diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log
new file mode 100644
index 0000000000000000000000000000000000000000..5bbd6da594f649c913c1d078600d4efde7a0592d
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.1.log
@@ -0,0 +1,4111 @@
+# Running on gpub001.delta.ncsa.illinois.edu
+# Started at Fri Jul 14 13:29:16 CDT 2023
+# SLURMD_NODENAME=gpub001
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2157595
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2157595
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[001-002,006,008,022,024,026-027,048-051,074,077-079]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=1052675
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_82d48caa-19ea-4797-8a82-8af4fa04f369
+[gpub001:0/64] 2023-07-14 13:30:20,482 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub001:0/64] 2023-07-14 13:30:21,930 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub001:0/64] 2023-07-14 13:30:21,964 (s2t:483) INFO: Vocabulary size: 50002
+[gpub001:0/64] 2023-07-14 13:30:35,251 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub001:0/64] 2023-07-14 13:30:35,260 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub001:0/64] 2023-07-14 13:30:35,277 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub001:0/64] 2023-07-14 13:30:35,962 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub001:0/64] 2023-07-14 13:30:44,311 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde52ec7eb0>)
+[gpub001:0/64] 2023-07-14 13:30:44,462 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/64] 2023-07-14 13:30:44,463 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub001:0/64] 2023-07-14 13:30:44,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde52ec7b50>)
+[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/64] 2023-07-14 13:30:45,289 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub001:0/64] 2023-07-14 13:31:11,236 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub001:1052798:1052798 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052798:1052798 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052798:1052798 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub001:0/64] 2023-07-14 13:31:16,544 (trainer:284) INFO: 49/60epoch started
+[gpub001:0/64] 2023-07-14 13:31:16,605 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-14 13:31:34,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdca0d2a410>)
+[gpub001:0/64] 2023-07-14 13:31:37,332 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-14 13:31:37,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub050:2739708:2739708 [3] NCCL INFO cudaDriverVersion 12010
+gpub050:2739708:2739708 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739708:2739708 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739708:2739778 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739708:2739778 [3] NCCL INFO Using network IB
+gpub050:2739708:2739778 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub050:2739708:2739778 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub050:2739708:2739778 [3] NCCL INFO Connected all rings
+gpub050:2739708:2739778 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub050:2739708:2739778 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub050:2739708:2739778 [3] NCCL INFO Connected all trees
+gpub050:2739708:2739778 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739708:2739778 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739708:2739778 [3] NCCL INFO comm 0x51443e00 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub050:2739706:2739706 [1] NCCL INFO cudaDriverVersion 12010
+gpub050:2739706:2739706 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739706:2739706 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739706:2739779 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739706:2739779 [1] NCCL INFO Using network IB
+gpub050:2739706:2739779 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub050:2739706:2739779 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Connected all rings
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub050:2739706:2739779 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub050:2739706:2739779 [1] NCCL INFO Connected all trees
+gpub050:2739706:2739779 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739706:2739779 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739706:2739779 [1] NCCL INFO comm 0xb91afa10 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub050:2739707:2739707 [2] NCCL INFO cudaDriverVersion 12010
+gpub050:2739707:2739707 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739707:2739707 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739707:2739777 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739707:2739777 [2] NCCL INFO Using network IB
+gpub050:2739707:2739777 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub050:2739707:2739777 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Connected all rings
+gpub050:2739707:2739777 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub050:2739707:2739777 [2] NCCL INFO Connected all trees
+gpub050:2739707:2739777 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739707:2739777 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739707:2739777 [2] NCCL INFO comm 0x9490430 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub050:2739705:2739705 [0] NCCL INFO cudaDriverVersion 12010
+gpub050:2739705:2739705 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2739705:2739705 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2739705:2739780 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2739705:2739780 [0] NCCL INFO Using network IB
+gpub050:2739705:2739780 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub050:2739705:2739780 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub050:2739705:2739780 [0] NCCL INFO Connected all rings
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub050:2739705:2739780 [0] NCCL INFO Connected all trees
+gpub050:2739705:2739780 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2739705:2739780 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2739705:2739780 [0] NCCL INFO comm 0xb798f2d0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub078:587387:587387 [1] NCCL INFO cudaDriverVersion 12010
+gpub078:587387:587387 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587387:587387 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587387:587522 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587387:587522 [1] NCCL INFO Using network IB
+gpub078:587387:587522 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub078:587387:587522 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Connected all rings
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub078:587387:587522 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:587387:587522 [1] NCCL INFO Connected all trees
+gpub078:587387:587522 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587387:587522 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587387:587522 [1] NCCL INFO comm 0x8bcb3990 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub078:587386:587386 [0] NCCL INFO cudaDriverVersion 12010
+gpub078:587386:587386 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587386:587386 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587386:587521 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587386:587521 [0] NCCL INFO Using network IB
+gpub078:587386:587521 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub078:587386:587521 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:587386:587521 [0] NCCL INFO Connected all rings
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub078:587386:587521 [0] NCCL INFO Connected all trees
+gpub078:587386:587521 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587386:587521 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587386:587521 [0] NCCL INFO comm 0xb210a550 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub078:587389:587389 [3] NCCL INFO cudaDriverVersion 12010
+gpub078:587389:587389 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587389:587389 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587389:587520 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587389:587520 [3] NCCL INFO Using network IB
+gpub078:587389:587520 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub078:587389:587520 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:587389:587520 [3] NCCL INFO Connected all rings
+gpub078:587389:587520 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:587389:587520 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:587389:587520 [3] NCCL INFO Connected all trees
+gpub078:587389:587520 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587389:587520 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587389:587520 [3] NCCL INFO comm 0xf3a7e40 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub002:2311584:2311584 [0] NCCL INFO cudaDriverVersion 12010
+gpub002:2311584:2311584 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311584:2311584 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311584:2311667 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311584:2311667 [0] NCCL INFO Using network IB
+gpub002:2311584:2311667 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub002:2311584:2311667 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub002:2311584:2311667 [0] NCCL INFO Connected all rings
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub002:2311584:2311667 [0] NCCL INFO Connected all trees
+gpub002:2311584:2311667 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311584:2311667 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311584:2311667 [0] NCCL INFO comm 0x9d597d00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub002:2311585:2311585 [1] NCCL INFO cudaDriverVersion 12010
+gpub002:2311585:2311585 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311585:2311585 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311585:2311664 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311585:2311664 [1] NCCL INFO Using network IB
+gpub002:2311585:2311664 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub002:2311585:2311664 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Connected all rings
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub002:2311585:2311664 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub002:2311585:2311664 [1] NCCL INFO Connected all trees
+gpub002:2311585:2311664 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311585:2311664 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311585:2311664 [1] NCCL INFO comm 0x9cc6bd40 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:2311586:2311586 [2] NCCL INFO cudaDriverVersion 12010
+gpub002:2311586:2311586 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311586:2311586 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311586:2311666 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311586:2311666 [2] NCCL INFO Using network IB
+gpub002:2311586:2311666 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub002:2311586:2311666 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Connected all rings
+gpub002:2311586:2311666 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub002:2311586:2311666 [2] NCCL INFO Connected all trees
+gpub002:2311586:2311666 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311586:2311666 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311586:2311666 [2] NCCL INFO comm 0x8ee58800 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:2311587:2311587 [3] NCCL INFO cudaDriverVersion 12010
+gpub002:2311587:2311587 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2311587:2311587 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2311587:2311665 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2311587:2311665 [3] NCCL INFO Using network IB
+gpub002:2311587:2311665 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub002:2311587:2311665 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub002:2311587:2311665 [3] NCCL INFO Connected all rings
+gpub002:2311587:2311665 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub002:2311587:2311665 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub002:2311587:2311665 [3] NCCL INFO Connected all trees
+gpub002:2311587:2311665 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2311587:2311665 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2311587:2311665 [3] NCCL INFO comm 0x4fabf1f0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub078:587388:587388 [2] NCCL INFO cudaDriverVersion 12010
+gpub078:587388:587388 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:587388:587388 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:587388:587523 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:587388:587523 [2] NCCL INFO Using network IB
+gpub078:587388:587523 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub078:587388:587523 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Connected all rings
+gpub078:587388:587523 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:587388:587523 [2] NCCL INFO Connected all trees
+gpub078:587388:587523 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:587388:587523 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:587388:587523 [2] NCCL INFO comm 0x4f14aa50 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub051:3424421:3424421 [3] NCCL INFO cudaDriverVersion 12010
+gpub051:3424421:3424421 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424421:3424421 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424421:3424552 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424421:3424552 [3] NCCL INFO Using network IB
+gpub051:3424421:3424552 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub051:3424421:3424552 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub051:3424421:3424552 [3] NCCL INFO Connected all rings
+gpub051:3424421:3424552 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub051:3424421:3424552 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub051:3424421:3424552 [3] NCCL INFO Connected all trees
+gpub051:3424421:3424552 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424421:3424552 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424421:3424552 [3] NCCL INFO comm 0x9ee4290 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:3424419:3424419 [1] NCCL INFO cudaDriverVersion 12010
+gpub051:3424419:3424419 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424419:3424419 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424419:3424553 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424419:3424553 [1] NCCL INFO Using network IB
+gpub051:3424419:3424553 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub051:3424419:3424553 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Connected all rings
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub051:3424419:3424553 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub051:3424419:3424553 [1] NCCL INFO Connected all trees
+gpub051:3424419:3424553 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424419:3424553 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424419:3424553 [1] NCCL INFO comm 0xb60902d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub051:3424418:3424418 [0] NCCL INFO cudaDriverVersion 12010
+gpub051:3424418:3424418 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424418:3424418 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424418:3424555 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424418:3424555 [0] NCCL INFO Using network IB
+gpub051:3424418:3424555 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub051:3424418:3424555 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub051:3424418:3424555 [0] NCCL INFO Connected all rings
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub051:3424418:3424555 [0] NCCL INFO Connected all trees
+gpub051:3424418:3424555 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424418:3424555 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424418:3424555 [0] NCCL INFO comm 0x8ebe3540 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub051:3424420:3424420 [2] NCCL INFO cudaDriverVersion 12010
+gpub051:3424420:3424420 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3424420:3424420 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3424420:3424554 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3424420:3424554 [2] NCCL INFO Using network IB
+gpub051:3424420:3424554 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub051:3424420:3424554 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Connected all rings
+gpub051:3424420:3424554 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub051:3424420:3424554 [2] NCCL INFO Connected all trees
+gpub051:3424420:3424554 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3424420:3424554 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3424420:3424554 [2] NCCL INFO comm 0x4f403790 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub027:4034780:4034780 [1] NCCL INFO cudaDriverVersion 12010
+gpub027:4034780:4034780 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034780:4034780 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034780:4034863 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034780:4034863 [1] NCCL INFO Using network IB
+gpub027:4034780:4034863 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub027:4034780:4034863 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Connected all rings
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub027:4034780:4034863 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub027:4034780:4034863 [1] NCCL INFO Connected all trees
+gpub027:4034780:4034863 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034780:4034863 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034780:4034863 [1] NCCL INFO comm 0x9afc8490 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub027:4034779:4034779 [0] NCCL INFO cudaDriverVersion 12010
+gpub027:4034779:4034779 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034779:4034779 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034779:4034862 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034779:4034862 [0] NCCL INFO Using network IB
+gpub027:4034779:4034862 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub027:4034779:4034862 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub027:4034779:4034862 [0] NCCL INFO Connected all rings
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub027:4034779:4034862 [0] NCCL INFO Connected all trees
+gpub027:4034779:4034862 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034779:4034862 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034779:4034862 [0] NCCL INFO comm 0xb5e6b1f0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub027:4034782:4034782 [3] NCCL INFO cudaDriverVersion 12010
+gpub027:4034782:4034782 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034782:4034782 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034782:4034861 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034782:4034861 [3] NCCL INFO Using network IB
+gpub027:4034782:4034861 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub027:4034782:4034861 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub027:4034782:4034861 [3] NCCL INFO Connected all rings
+gpub027:4034782:4034861 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub027:4034782:4034861 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub027:4034782:4034861 [3] NCCL INFO Connected all trees
+gpub027:4034782:4034861 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034782:4034861 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034782:4034861 [3] NCCL INFO comm 0x4f996350 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub027:4034781:4034781 [2] NCCL INFO cudaDriverVersion 12010
+gpub027:4034781:4034781 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:4034781:4034781 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:4034781:4034864 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:4034781:4034864 [2] NCCL INFO Using network IB
+gpub027:4034781:4034864 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub027:4034781:4034864 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Connected all rings
+gpub027:4034781:4034864 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub027:4034781:4034864 [2] NCCL INFO Connected all trees
+gpub027:4034781:4034864 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:4034781:4034864 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:4034781:4034864 [2] NCCL INFO comm 0x8d940630 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub077:2521721:2521721 [3] NCCL INFO cudaDriverVersion 12010
+gpub077:2521721:2521721 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521721:2521721 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521721:2521790 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521721:2521790 [3] NCCL INFO Using network IB
+gpub077:2521721:2521790 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub077:2521721:2521790 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub077:2521721:2521790 [3] NCCL INFO Connected all rings
+gpub077:2521721:2521790 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub077:2521721:2521790 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub077:2521721:2521790 [3] NCCL INFO Connected all trees
+gpub077:2521721:2521790 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521721:2521790 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521721:2521790 [3] NCCL INFO comm 0x500bb780 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub079:3396495:3396495 [2] NCCL INFO cudaDriverVersion 12010
+gpub079:3396495:3396495 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396495:3396495 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396495:3396576 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396495:3396576 [2] NCCL INFO Using network IB
+gpub079:3396495:3396576 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub079:3396495:3396576 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Connected all rings
+gpub079:3396495:3396576 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub079:3396495:3396576 [2] NCCL INFO Connected all trees
+gpub079:3396495:3396576 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396495:3396576 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396495:3396576 [2] NCCL INFO comm 0x8e939be0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub006:1859755:1859755 [3] NCCL INFO cudaDriverVersion 12010
+gpub006:1859755:1859755 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859755:1859755 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859755:1859833 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859755:1859833 [3] NCCL INFO Using network IB
+gpub006:1859755:1859833 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub006:1859755:1859833 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub006:1859755:1859833 [3] NCCL INFO Connected all rings
+gpub006:1859755:1859833 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub006:1859755:1859833 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub006:1859755:1859833 [3] NCCL INFO Connected all trees
+gpub006:1859755:1859833 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859755:1859833 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859755:1859833 [3] NCCL INFO comm 0x50847890 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub049:277663:277663 [1] NCCL INFO cudaDriverVersion 12010
+gpub049:277663:277663 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277663:277663 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277663:277744 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277663:277744 [1] NCCL INFO Using network IB
+gpub049:277663:277744 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub049:277663:277744 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Connected all rings
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub049:277663:277744 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub049:277663:277744 [1] NCCL INFO Connected all trees
+gpub049:277663:277744 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277663:277744 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277663:277744 [1] NCCL INFO comm 0xb77e62d0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:3396496:3396496 [3] NCCL INFO cudaDriverVersion 12010
+gpub079:3396496:3396496 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396496:3396496 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396496:3396574 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396496:3396574 [3] NCCL INFO Using network IB
+gpub079:3396496:3396574 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub079:3396496:3396574 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub079:3396496:3396574 [3] NCCL INFO Connected all rings
+gpub079:3396496:3396574 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub079:3396496:3396574 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub079:3396496:3396574 [3] NCCL INFO Connected all trees
+gpub079:3396496:3396574 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396496:3396574 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396496:3396574 [3] NCCL INFO comm 0x51317510 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:4055358:4055358 [2] NCCL INFO cudaDriverVersion 12010
+gpub074:4055358:4055358 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055358:4055358 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055358:4055429 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055358:4055429 [2] NCCL INFO Using network IB
+gpub074:4055358:4055429 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub074:4055358:4055429 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Connected all rings
+gpub074:4055358:4055429 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub049:277665:277665 [3] NCCL INFO cudaDriverVersion 12010
+gpub049:277665:277665 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277665:277665 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277665:277743 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277665:277743 [3] NCCL INFO Using network IB
+gpub049:277665:277743 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub049:277665:277743 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub049:277665:277743 [3] NCCL INFO Connected all rings
+gpub049:277665:277743 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub049:277665:277743 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub074:4055358:4055429 [2] NCCL INFO Connected all trees
+gpub074:4055358:4055429 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055358:4055429 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055358:4055429 [2] NCCL INFO comm 0x5076e6a0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub049:277665:277743 [3] NCCL INFO Connected all trees
+gpub049:277665:277743 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277665:277743 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277665:277743 [3] NCCL INFO comm 0x9d1a0c70 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub049:277662:277662 [0] NCCL INFO cudaDriverVersion 12010
+gpub049:277662:277662 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277662:277662 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277662:277745 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277662:277745 [0] NCCL INFO Using network IB
+gpub049:277662:277745 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub049:277662:277745 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub049:277662:277745 [0] NCCL INFO Connected all rings
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub049:277662:277745 [0] NCCL INFO Connected all trees
+gpub049:277662:277745 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277662:277745 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277662:277745 [0] NCCL INFO comm 0x50033560 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub074:4055359:4055359 [3] NCCL INFO cudaDriverVersion 12010
+gpub074:4055359:4055359 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055359:4055359 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055359:4055428 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055359:4055428 [3] NCCL INFO Using network IB
+gpub074:4055359:4055428 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub074:4055359:4055428 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub074:4055359:4055428 [3] NCCL INFO Connected all rings
+gpub074:4055359:4055428 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub074:4055359:4055428 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub074:4055359:4055428 [3] NCCL INFO Connected all trees
+gpub074:4055359:4055428 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055359:4055428 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055359:4055428 [3] NCCL INFO comm 0xb59ce3d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub077:2521719:2521719 [1] NCCL INFO cudaDriverVersion 12010
+gpub077:2521719:2521719 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521719:2521719 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521719:2521792 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521719:2521792 [1] NCCL INFO Using network IB
+gpub077:2521719:2521792 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub077:2521719:2521792 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Connected all rings
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub077:2521719:2521792 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub077:2521719:2521792 [1] NCCL INFO Connected all trees
+gpub077:2521719:2521792 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521719:2521792 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521719:2521792 [1] NCCL INFO comm 0xb802530 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:3396493:3396493 [0] NCCL INFO cudaDriverVersion 12010
+gpub079:3396493:3396493 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396493:3396493 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396493:3396573 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396493:3396573 [0] NCCL INFO Using network IB
+gpub079:3396493:3396573 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub079:3396493:3396573 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub079:3396493:3396573 [0] NCCL INFO Connected all rings
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub079:3396493:3396573 [0] NCCL INFO Connected all trees
+gpub079:3396493:3396573 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396493:3396573 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396493:3396573 [0] NCCL INFO comm 0x4f9d83d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub026:2781297:2781297 [1] NCCL INFO cudaDriverVersion 12010
+gpub026:2781297:2781297 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781297:2781297 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781297:2781382 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781297:2781382 [1] NCCL INFO Using network IB
+gpub026:2781297:2781382 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub026:2781297:2781382 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Connected all rings
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub026:2781297:2781382 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub026:2781297:2781382 [1] NCCL INFO Connected all trees
+gpub026:2781297:2781382 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781297:2781382 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781297:2781382 [1] NCCL INFO comm 0x50ca5540 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub006:1859754:1859754 [2] NCCL INFO cudaDriverVersion 12010
+gpub006:1859754:1859754 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859754:1859754 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859754:1859834 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859754:1859834 [2] NCCL INFO Using network IB
+gpub006:1859754:1859834 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub006:1859754:1859834 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Connected all rings
+gpub006:1859754:1859834 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub006:1859754:1859834 [2] NCCL INFO Connected all trees
+gpub006:1859754:1859834 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859754:1859834 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859754:1859834 [2] NCCL INFO comm 0xa42aef0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:4055357:4055357 [1] NCCL INFO cudaDriverVersion 12010
+gpub074:4055357:4055357 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055357:4055357 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055357:4055427 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055357:4055427 [1] NCCL INFO Using network IB
+gpub074:4055357:4055427 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub074:4055357:4055427 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Connected all rings
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub074:4055357:4055427 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub074:4055357:4055427 [1] NCCL INFO Connected all trees
+gpub074:4055357:4055427 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055357:4055427 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055357:4055427 [1] NCCL INFO comm 0x8b30550 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:3396494:3396494 [1] NCCL INFO cudaDriverVersion 12010
+gpub079:3396494:3396494 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:3396494:3396494 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:3396494:3396575 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:3396494:3396575 [1] NCCL INFO Using network IB
+gpub079:3396494:3396575 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub079:3396494:3396575 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Connected all rings
+gpub079:3396494:3396575 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub079:3396494:3396575 [1] NCCL INFO Connected all trees
+gpub079:3396494:3396575 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:3396494:3396575 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:3396494:3396575 [1] NCCL INFO comm 0xc163a90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub026:2781299:2781299 [3] NCCL INFO cudaDriverVersion 12010
+gpub026:2781299:2781299 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781299:2781299 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781299:2781379 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781299:2781379 [3] NCCL INFO Using network IB
+gpub026:2781299:2781379 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub026:2781299:2781379 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub026:2781299:2781379 [3] NCCL INFO Connected all rings
+gpub026:2781299:2781379 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub026:2781299:2781379 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub026:2781299:2781379 [3] NCCL INFO Connected all trees
+gpub026:2781299:2781379 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781299:2781379 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781299:2781379 [3] NCCL INFO comm 0x507c61a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub077:2521720:2521720 [2] NCCL INFO cudaDriverVersion 12010
+gpub077:2521720:2521720 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521720:2521720 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521720:2521791 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521720:2521791 [2] NCCL INFO Using network IB
+gpub077:2521720:2521791 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub077:2521720:2521791 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Connected all rings
+gpub077:2521720:2521791 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub077:2521720:2521791 [2] NCCL INFO Connected all trees
+gpub077:2521720:2521791 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521720:2521791 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521720:2521791 [2] NCCL INFO comm 0xa4c559d0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub006:1859752:1859752 [0] NCCL INFO cudaDriverVersion 12010
+gpub006:1859752:1859752 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859752:1859752 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859752:1859836 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859752:1859836 [0] NCCL INFO Using network IB
+gpub006:1859752:1859836 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub006:1859752:1859836 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub006:1859752:1859836 [0] NCCL INFO Connected all rings
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub006:1859752:1859836 [0] NCCL INFO Connected all trees
+gpub006:1859752:1859836 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859752:1859836 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859752:1859836 [0] NCCL INFO comm 0x50278a40 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub008:2990364:2990364 [0] NCCL INFO cudaDriverVersion 12010
+gpub008:2990364:2990364 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990364:2990364 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990364:2990440 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990364:2990440 [0] NCCL INFO Using network IB
+gpub008:2990364:2990440 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub008:2990364:2990440 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub008:2990364:2990440 [0] NCCL INFO Connected all rings
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub008:2990364:2990440 [0] NCCL INFO Connected all trees
+gpub008:2990364:2990440 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990364:2990440 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990364:2990440 [0] NCCL INFO comm 0xa229210 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub077:2521718:2521718 [0] NCCL INFO cudaDriverVersion 12010
+gpub077:2521718:2521718 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:2521718:2521718 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:2521718:2521789 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:2521718:2521789 [0] NCCL INFO Using network IB
+gpub077:2521718:2521789 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub077:2521718:2521789 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub077:2521718:2521789 [0] NCCL INFO Connected all rings
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub077:2521718:2521789 [0] NCCL INFO Connected all trees
+gpub077:2521718:2521789 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:2521718:2521789 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:2521718:2521789 [0] NCCL INFO comm 0x5162ad90 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub008:2990366:2990366 [2] NCCL INFO cudaDriverVersion 12010
+gpub008:2990366:2990366 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990366:2990366 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990366:2990443 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990366:2990443 [2] NCCL INFO Using network IB
+gpub008:2990366:2990443 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub008:2990366:2990443 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Connected all rings
+gpub008:2990366:2990443 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub008:2990366:2990443 [2] NCCL INFO Connected all trees
+gpub008:2990366:2990443 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990366:2990443 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990366:2990443 [2] NCCL INFO comm 0xb6d2c880 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:4055356:4055356 [0] NCCL INFO cudaDriverVersion 12010
+gpub074:4055356:4055356 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4055356:4055356 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4055356:4055430 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4055356:4055430 [0] NCCL INFO Using network IB
+gpub074:4055356:4055430 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub074:4055356:4055430 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub074:4055356:4055430 [0] NCCL INFO Connected all rings
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:4055356:4055430 [0] NCCL INFO Connected all trees
+gpub074:4055356:4055430 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:4055356:4055430 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:4055356:4055430 [0] NCCL INFO comm 0x9c0ae50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub006:1859753:1859753 [1] NCCL INFO cudaDriverVersion 12010
+gpub006:1859753:1859753 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.106<0>
+gpub006:1859753:1859753 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub006:1859753:1859835 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.106<0>
+gpub006:1859753:1859835 [1] NCCL INFO Using network IB
+gpub006:1859753:1859835 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub006:1859753:1859835 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Connected all rings
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub006:1859753:1859835 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub006:1859753:1859835 [1] NCCL INFO Connected all trees
+gpub006:1859753:1859835 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub006:1859753:1859835 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub006:1859753:1859835 [1] NCCL INFO comm 0xa3eb5f0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub001:1052799:1052799 [1] NCCL INFO cudaDriverVersion 12010
+gpub001:1052799:1052799 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052799:1052799 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052799:1052880 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052799:1052880 [1] NCCL INFO Using network IB
+gpub001:1052799:1052880 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub001:1052799:1052880 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Connected all rings
+gpub001:1052799:1052880 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:1052799:1052880 [1] NCCL INFO Connected all trees
+gpub001:1052799:1052880 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052799:1052880 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052799:1052880 [1] NCCL INFO comm 0x50befe70 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub001:1052801:1052801 [3] NCCL INFO cudaDriverVersion 12010
+gpub001:1052801:1052801 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052801:1052801 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052801:1052879 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052801:1052879 [3] NCCL INFO Using network IB
+gpub001:1052801:1052879 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub001:1052801:1052879 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:1052801:1052879 [3] NCCL INFO Connected all rings
+gpub001:1052801:1052879 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:1052801:1052879 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:1052801:1052879 [3] NCCL INFO Connected all trees
+gpub001:1052801:1052879 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052801:1052879 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052801:1052879 [3] NCCL INFO comm 0xb78dc020 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub026:2781298:2781298 [2] NCCL INFO cudaDriverVersion 12010
+gpub026:2781298:2781298 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781298:2781298 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781298:2781380 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781298:2781380 [2] NCCL INFO Using network IB
+gpub026:2781298:2781380 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub026:2781298:2781380 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub026:2781298:2781380 [2] NCCL INFO Connected all rings
+gpub026:2781298:2781380 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub026:2781298:2781380 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub049:277664:277664 [2] NCCL INFO cudaDriverVersion 12010
+gpub049:277664:277664 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:277664:277664 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:277664:277742 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:277664:277742 [2] NCCL INFO Using network IB
+gpub049:277664:277742 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub049:277664:277742 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Connected all rings
+gpub049:277664:277742 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub049:277664:277742 [2] NCCL INFO Connected all trees
+gpub026:2781298:2781380 [2] NCCL INFO Connected all trees
+gpub026:2781298:2781380 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781298:2781380 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781298:2781380 [2] NCCL INFO comm 0x8e36d550 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub049:277664:277742 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:277664:277742 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:277664:277742 [2] NCCL INFO comm 0x92096c0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub008:2990367:2990367 [3] NCCL INFO cudaDriverVersion 12010
+gpub008:2990367:2990367 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990367:2990367 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990367:2990442 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990367:2990442 [3] NCCL INFO Using network IB
+gpub008:2990367:2990442 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub008:2990367:2990442 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub008:2990367:2990442 [3] NCCL INFO Connected all rings
+gpub008:2990367:2990442 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub008:2990367:2990442 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub008:2990367:2990442 [3] NCCL INFO Connected all trees
+gpub008:2990367:2990442 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990367:2990442 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990367:2990442 [3] NCCL INFO comm 0x4fa470f0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub001:1052798:1052882 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052798:1052882 [0] NCCL INFO Using network IB
+gpub001:1052798:1052882 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:1052798:1052882 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:1052798:1052882 [0] NCCL INFO Connected all rings
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:1052798:1052882 [0] NCCL INFO Connected all trees
+gpub001:1052798:1052882 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052798:1052882 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052798:1052882 [0] NCCL INFO comm 0x50dde690 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:106667:106667 [1] NCCL INFO cudaDriverVersion 12010
+gpub022:106667:106667 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106667:106667 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106667:106747 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106667:106747 [1] NCCL INFO Using network IB
+gpub022:106667:106747 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub022:106667:106747 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Connected all rings
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub022:106667:106747 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub022:106667:106747 [1] NCCL INFO Connected all trees
+gpub022:106667:106747 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106667:106747 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106667:106747 [1] NCCL INFO comm 0x8f3e3330 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2990365:2990365 [1] NCCL INFO cudaDriverVersion 12010
+gpub008:2990365:2990365 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2990365:2990365 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2990365:2990441 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2990365:2990441 [1] NCCL INFO Using network IB
+gpub008:2990365:2990441 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub008:2990365:2990441 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Connected all rings
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub008:2990365:2990441 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub008:2990365:2990441 [1] NCCL INFO Connected all trees
+gpub008:2990365:2990441 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2990365:2990441 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2990365:2990441 [1] NCCL INFO comm 0x98eccf0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub024:541987:541987 [2] NCCL INFO cudaDriverVersion 12010
+gpub024:541987:541987 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541987:541987 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541987:542066 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541987:542066 [2] NCCL INFO Using network IB
+gpub024:541987:542066 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub024:541987:542066 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Connected all rings
+gpub024:541987:542066 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub024:541987:542066 [2] NCCL INFO Connected all trees
+gpub024:541987:542066 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541987:542066 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541987:542066 [2] NCCL INFO comm 0x505ff970 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub001:1052800:1052800 [2] NCCL INFO cudaDriverVersion 12010
+gpub001:1052800:1052800 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:1052800:1052800 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:1052800:1052881 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:1052800:1052881 [2] NCCL INFO Using network IB
+gpub001:1052800:1052881 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub001:1052800:1052881 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Connected all rings
+gpub001:1052800:1052881 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:1052800:1052881 [2] NCCL INFO Connected all trees
+gpub001:1052800:1052881 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:1052800:1052881 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:1052800:1052881 [2] NCCL INFO comm 0x8e66c510 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub022:106669:106669 [3] NCCL INFO cudaDriverVersion 12010
+gpub022:106669:106669 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106669:106669 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106669:106748 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106669:106748 [3] NCCL INFO Using network IB
+gpub022:106669:106748 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub022:106669:106748 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub022:106669:106748 [3] NCCL INFO Connected all rings
+gpub022:106669:106748 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub022:106669:106748 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub022:106669:106748 [3] NCCL INFO Connected all trees
+gpub022:106669:106748 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106669:106748 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106669:106748 [3] NCCL INFO comm 0x4f1d7190 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub026:2781296:2781296 [0] NCCL INFO cudaDriverVersion 12010
+gpub026:2781296:2781296 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2781296:2781296 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2781296:2781381 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2781296:2781381 [0] NCCL INFO Using network IB
+gpub026:2781296:2781381 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub026:2781296:2781381 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub026:2781296:2781381 [0] NCCL INFO Connected all rings
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub026:2781296:2781381 [0] NCCL INFO Connected all trees
+gpub026:2781296:2781381 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2781296:2781381 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2781296:2781381 [0] NCCL INFO comm 0xaebd9cd0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub024:541985:541985 [0] NCCL INFO cudaDriverVersion 12010
+gpub024:541985:541985 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541985:541985 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541985:542068 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541985:542068 [0] NCCL INFO Using network IB
+gpub024:541985:542068 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub024:541985:542068 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub024:541985:542068 [0] NCCL INFO Connected all rings
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub024:541985:542068 [0] NCCL INFO Connected all trees
+gpub024:541985:542068 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541985:542068 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541985:542068 [0] NCCL INFO comm 0x4ffe64c0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:106668:106668 [2] NCCL INFO cudaDriverVersion 12010
+gpub022:106668:106668 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106668:106668 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106668:106749 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106668:106749 [2] NCCL INFO Using network IB
+gpub022:106668:106749 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub022:106668:106749 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Connected all rings
+gpub022:106668:106749 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub022:106668:106749 [2] NCCL INFO Connected all trees
+gpub022:106668:106749 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106668:106749 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106668:106749 [2] NCCL INFO comm 0x4fd27690 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub024:541988:541988 [3] NCCL INFO cudaDriverVersion 12010
+gpub024:541988:541988 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541988:541988 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541988:542067 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541988:542067 [3] NCCL INFO Using network IB
+gpub024:541988:542067 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub024:541988:542067 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub024:541988:542067 [3] NCCL INFO Connected all rings
+gpub024:541988:542067 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub024:541988:542067 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub024:541988:542067 [3] NCCL INFO Connected all trees
+gpub024:541988:542067 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541988:542067 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541988:542067 [3] NCCL INFO comm 0xb65fe8d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub048:3933786:3933786 [3] NCCL INFO cudaDriverVersion 12010
+gpub048:3933786:3933786 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933786:3933786 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933786:3933849 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933786:3933849 [3] NCCL INFO Using network IB
+gpub048:3933786:3933849 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub048:3933786:3933849 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub048:3933786:3933849 [3] NCCL INFO Connected all rings
+gpub048:3933786:3933849 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub048:3933786:3933849 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub048:3933786:3933849 [3] NCCL INFO Connected all trees
+gpub048:3933786:3933849 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933786:3933849 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933786:3933849 [3] NCCL INFO comm 0x8e08e110 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub048:3933785:3933785 [2] NCCL INFO cudaDriverVersion 12010
+gpub048:3933785:3933785 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933785:3933785 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933785:3933846 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933785:3933846 [2] NCCL INFO Using network IB
+gpub048:3933785:3933846 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub048:3933785:3933846 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Connected all rings
+gpub048:3933785:3933846 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub048:3933785:3933846 [2] NCCL INFO Connected all trees
+gpub048:3933785:3933846 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933785:3933846 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933785:3933846 [2] NCCL INFO comm 0xb9dce190 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub048:3933784:3933784 [1] NCCL INFO cudaDriverVersion 12010
+gpub048:3933784:3933784 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933784:3933784 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933784:3933848 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933784:3933848 [1] NCCL INFO Using network IB
+gpub048:3933784:3933848 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub048:3933784:3933848 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Connected all rings
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub048:3933784:3933848 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub048:3933784:3933848 [1] NCCL INFO Connected all trees
+gpub048:3933784:3933848 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933784:3933848 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933784:3933848 [1] NCCL INFO comm 0x9d3ee1d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub024:541986:541986 [1] NCCL INFO cudaDriverVersion 12010
+gpub024:541986:541986 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:541986:541986 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:541986:542065 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:541986:542065 [1] NCCL INFO Using network IB
+gpub024:541986:542065 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub024:541986:542065 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Connected all rings
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub024:541986:542065 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub024:541986:542065 [1] NCCL INFO Connected all trees
+gpub024:541986:542065 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:541986:542065 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:541986:542065 [1] NCCL INFO comm 0x8c61ca80 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub048:3933783:3933783 [0] NCCL INFO cudaDriverVersion 12010
+gpub048:3933783:3933783 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.148<0>
+gpub048:3933783:3933783 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub048:3933783:3933847 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.148<0>
+gpub048:3933783:3933847 [0] NCCL INFO Using network IB
+gpub048:3933783:3933847 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub048:3933783:3933847 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub048:3933783:3933847 [0] NCCL INFO Connected all rings
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub048:3933783:3933847 [0] NCCL INFO Connected all trees
+gpub048:3933783:3933847 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub048:3933783:3933847 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub048:3933783:3933847 [0] NCCL INFO comm 0x8d070d10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:106666:106666 [0] NCCL INFO cudaDriverVersion 12010
+gpub022:106666:106666 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:106666:106666 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:106666:106746 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:106666:106746 [0] NCCL INFO Using network IB
+gpub022:106666:106746 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub022:106666:106746 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub022:106666:106746 [0] NCCL INFO Connected all rings
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:106666:106746 [0] NCCL INFO Connected all trees
+gpub022:106666:106746 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:106666:106746 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:106666:106746 [0] NCCL INFO comm 0x4ef16f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub001:0/64] 2023-07-14 13:38:31,549 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.254, forward_time=0.216, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.042, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.157e-05, train_time=8.698
+[gpub001:0/64] 2023-07-14 13:40:47,428 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.324e-04, forward_time=0.143, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.027, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.718
+[gpub001:0/64] 2023-07-14 13:43:04,326 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.368e-04, forward_time=0.144, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.034, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.156e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 13:45:19,908 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.152e-04, forward_time=0.140, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.023, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.155e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 13:47:38,398 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.057e-04, forward_time=0.140, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.028, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.155e-05, train_time=2.770
+[gpub001:0/64] 2023-07-14 13:49:52,942 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.077e-04, forward_time=0.139, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.019, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.691
+[gpub001:0/64] 2023-07-14 13:52:16,838 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.205e-04, forward_time=0.142, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.035, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.154e-05, train_time=2.878
+[gpub001:0/64] 2023-07-14 13:54:35,492 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.298e-04, forward_time=0.142, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.026, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.153e-05, train_time=2.773
+[gpub001:0/64] 2023-07-14 13:55:27,347 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-14 13:55:45,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 13:55:48,387 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdca0d2b910>)
+[gpub001:0/64] 2023-07-14 13:55:48,388 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-14 13:55:48,394 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 14:01:54,878 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=1.305, forward_time=0.165, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.037, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.153e-05, train_time=8.787
+[gpub001:0/64] 2023-07-14 14:04:11,115 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.143, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.026, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.725
+[gpub001:0/64] 2023-07-14 14:06:26,981 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.025, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.152e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 14:08:43,048 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.142, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.027, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.151e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 14:10:58,405 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.299e-04, forward_time=0.143, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.023, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.150e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 14:13:13,436 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.351e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.022, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.150e-05, train_time=2.700
+[gpub001:0/64] 2023-07-14 14:15:28,715 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=1.272e-04, forward_time=0.143, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.023, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.149e-05, train_time=2.705
+[gpub001:0/64] 2023-07-14 14:17:44,041 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.310e-04, forward_time=0.143, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.024, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.149e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 14:19:28,411 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-14 14:19:46,106 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdc3133ae30>)
+[gpub001:0/64] 2023-07-14 14:19:49,464 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-14 14:19:49,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 14:24:53,336 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=2.398, forward_time=0.158, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.039, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.148e-05, train_time=8.586
+[gpub001:0/64] 2023-07-14 14:27:11,824 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.032, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.148e-05, train_time=2.770
+[gpub001:0/64] 2023-07-14 14:29:27,834 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.026, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.181, optim0_lr0=5.147e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 14:31:43,989 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.183e-04, forward_time=0.144, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.028, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.147e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 14:34:00,002 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.026, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 14:35:34,874 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 14:36:15,530 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.025, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.180, optim0_lr0=5.146e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 14:38:31,109 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.025, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.145e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 14:40:46,863 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.025, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 14:43:02,330 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=71.274, loss_att=49.885, acc=0.724, loss=56.302, backward_time=1.024, grad_norm=140.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.144e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 14:43:03,512 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-14 14:43:21,506 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde3ed43460>)
+[gpub001:0/64] 2023-07-14 14:43:24,979 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-14 14:43:24,985 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 14:50:14,455 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=1.236, forward_time=0.144, loss_ctc=77.943, loss_att=56.932, acc=0.709, loss=63.235, backward_time=1.048, grad_norm=215.635, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.143e-05, train_time=8.642
+[gpub001:0/64] 2023-07-14 14:52:30,604 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=76.520, loss_att=56.178, acc=0.714, loss=62.281, backward_time=1.026, grad_norm=138.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.143e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 14:54:46,489 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.928, loss_att=50.393, acc=0.725, loss=56.554, backward_time=1.025, grad_norm=113.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.142e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 14:57:02,630 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=80.053, loss_att=64.710, acc=0.711, loss=69.313, backward_time=1.028, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.142e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 14:59:17,960 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.280e-04, forward_time=0.143, loss_ctc=67.583, loss_att=50.322, acc=0.734, loss=55.500, backward_time=1.023, grad_norm=118.809, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 15:01:33,622 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.303e-04, forward_time=0.146, loss_ctc=64.848, loss_att=44.323, acc=0.737, loss=50.480, backward_time=1.024, grad_norm=131.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.141e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 15:03:48,945 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.493, loss_att=50.724, acc=0.731, loss=56.054, backward_time=1.022, grad_norm=139.131, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 15:06:04,272 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.350e-04, forward_time=0.144, loss_ctc=62.583, loss_att=44.592, acc=0.725, loss=49.990, backward_time=1.023, grad_norm=116.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.140e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 15:06:50,474 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-14 15:07:08,807 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 15:07:12,196 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf63e3be0>)
+[gpub001:0/64] 2023-07-14 15:07:12,197 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-14 15:07:12,203 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 15:12:51,478 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.286, forward_time=0.144, loss_ctc=82.374, loss_att=58.429, acc=0.718, loss=65.612, backward_time=1.042, grad_norm=161.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.139e-05, train_time=8.144
+[gpub001:0/64] 2023-07-14 15:15:09,454 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.225e-04, forward_time=0.143, loss_ctc=72.824, loss_att=56.074, acc=0.705, loss=61.099, backward_time=1.027, grad_norm=124.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.759
+[gpub001:0/64] 2023-07-14 15:16:18,542 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 15:17:26,359 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=70.789, loss_att=51.064, acc=0.720, loss=56.982, backward_time=1.025, grad_norm=127.943, clip=100.000, loss_scale=2.417e+32, optim_step_time=0.180, optim0_lr0=5.138e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 15:19:44,415 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=81.997, loss_att=65.961, acc=0.690, loss=70.771, backward_time=1.027, grad_norm=125.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.761
+[gpub001:0/64] 2023-07-14 15:22:04,902 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.446, loss_att=49.301, acc=0.730, loss=55.044, backward_time=1.028, grad_norm=115.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.137e-05, train_time=2.810
+[gpub001:0/64] 2023-07-14 15:24:23,287 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=70.058, loss_att=52.734, acc=0.711, loss=57.931, backward_time=1.024, grad_norm=135.739, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.767
+[gpub001:0/64] 2023-07-14 15:26:40,552 (trainer:732) INFO: 49epoch:train:3901-4000batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=63.341, loss_att=45.010, acc=0.729, loss=50.510, backward_time=1.027, grad_norm=115.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.136e-05, train_time=2.745
+[gpub001:0/64] 2023-07-14 15:28:56,692 (trainer:732) INFO: 49epoch:train:4001-4100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=67.558, loss_att=49.060, acc=0.721, loss=54.609, backward_time=1.024, grad_norm=113.122, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 15:30:27,503 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-14 15:30:45,587 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbecfc7490>)
+[gpub001:0/64] 2023-07-14 15:30:49,041 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-14 15:30:49,047 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 15:35:52,270 (trainer:732) INFO: 49epoch:train:4101-4200batch: iter_time=1.261, forward_time=0.144, loss_ctc=70.221, loss_att=53.085, acc=0.717, loss=58.226, backward_time=1.035, grad_norm=140.521, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.135e-05, train_time=8.311
+[gpub001:0/64] 2023-07-14 15:38:08,843 (trainer:732) INFO: 49epoch:train:4201-4300batch: iter_time=1.161e-04, forward_time=0.144, loss_ctc=73.382, loss_att=57.566, acc=0.707, loss=62.311, backward_time=1.029, grad_norm=116.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.134e-05, train_time=2.731
+[gpub001:0/64] 2023-07-14 15:40:24,503 (trainer:732) INFO: 49epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=72.437, loss_att=50.993, acc=0.718, loss=57.426, backward_time=1.025, grad_norm=135.729, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.134e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 15:42:39,948 (trainer:732) INFO: 49epoch:train:4401-4500batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=79.101, loss_att=66.083, acc=0.685, loss=69.989, backward_time=1.023, grad_norm=131.828, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 15:44:55,456 (trainer:732) INFO: 49epoch:train:4501-4600batch: iter_time=1.249e-04, forward_time=0.143, loss_ctc=67.710, loss_att=48.912, acc=0.733, loss=54.551, backward_time=1.023, grad_norm=151.097, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.133e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 15:47:10,473 (trainer:732) INFO: 49epoch:train:4601-4700batch: iter_time=1.201e-04, forward_time=0.142, loss_ctc=67.960, loss_att=47.417, acc=0.714, loss=53.580, backward_time=1.021, grad_norm=139.328, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.132e-05, train_time=2.700
+[gpub001:0/64] 2023-07-14 15:49:28,348 (trainer:732) INFO: 49epoch:train:4701-4800batch: iter_time=1.428e-04, forward_time=0.143, loss_ctc=66.335, loss_att=46.579, acc=0.730, loss=52.506, backward_time=1.025, grad_norm=121.960, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.757
+[gpub001:0/64] 2023-07-14 15:51:43,745 (trainer:732) INFO: 49epoch:train:4801-4900batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=65.075, loss_att=47.591, acc=0.717, loss=52.836, backward_time=1.024, grad_norm=142.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.131e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 15:53:59,339 (trainer:732) INFO: 49epoch:train:4901-5000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=76.840, loss_att=55.520, acc=0.720, loss=61.916, backward_time=1.025, grad_norm=146.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=2.712
+[gpub001:0/64] 2023-07-14 15:54:01,002 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-14 15:54:18,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf6585270>)
+[gpub001:0/64] 2023-07-14 15:54:22,671 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-14 15:54:22,677 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 16:02:02,572 (trainer:732) INFO: 49epoch:train:5001-5100batch: iter_time=1.246, forward_time=0.173, loss_ctc=70.620, loss_att=55.070, acc=0.710, loss=59.735, backward_time=1.102, grad_norm=143.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.130e-05, train_time=9.664
+[gpub001:0/64] 2023-07-14 16:05:05,429 (trainer:732) INFO: 49epoch:train:5101-5200batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=74.968, loss_att=53.708, acc=0.725, loss=60.086, backward_time=1.111, grad_norm=140.062, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.657
+[gpub001:0/64] 2023-07-14 16:07:39,205 (trainer:732) INFO: 49epoch:train:5201-5300batch: iter_time=1.230e-04, forward_time=0.143, loss_ctc=78.360, loss_att=62.863, acc=0.701, loss=67.512, backward_time=1.045, grad_norm=129.656, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.129e-05, train_time=3.075
+[gpub001:0/64] 2023-07-14 16:10:30,819 (trainer:732) INFO: 49epoch:train:5301-5400batch: iter_time=1.198e-04, forward_time=0.143, loss_ctc=70.380, loss_att=52.634, acc=0.739, loss=57.958, backward_time=1.065, grad_norm=145.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.128e-05, train_time=3.432
+[gpub001:0/64] 2023-07-14 16:13:06,699 (trainer:732) INFO: 49epoch:train:5401-5500batch: iter_time=1.248e-04, forward_time=0.144, loss_ctc=66.159, loss_att=49.037, acc=0.727, loss=54.174, backward_time=1.050, grad_norm=127.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.128e-05, train_time=3.117
+[gpub001:0/64] 2023-07-14 16:15:41,679 (trainer:732) INFO: 49epoch:train:5501-5600batch: iter_time=1.175e-04, forward_time=0.143, loss_ctc=63.480, loss_att=42.575, acc=0.741, loss=48.847, backward_time=1.045, grad_norm=114.198, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=3.099
+[gpub001:0/64] 2023-07-14 16:18:02,778 (trainer:732) INFO: 49epoch:train:5601-5700batch: iter_time=1.207e-04, forward_time=0.143, loss_ctc=71.660, loss_att=53.814, acc=0.721, loss=59.168, backward_time=1.031, grad_norm=134.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.127e-05, train_time=2.822
+[gpub001:0/64] 2023-07-14 16:20:31,593 (trainer:732) INFO: 49epoch:train:5701-5800batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.301, loss_att=52.766, acc=0.725, loss=58.326, backward_time=1.041, grad_norm=137.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=2.976
+[gpub001:0/64] 2023-07-14 16:21:30,492 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-14 16:21:48,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9e2ff70>)
+[gpub001:0/64] 2023-07-14 16:21:52,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-14 16:21:52,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 16:27:57,645 (trainer:732) INFO: 49epoch:train:5801-5900batch: iter_time=1.522, forward_time=0.161, loss_ctc=73.115, loss_att=49.116, acc=0.724, loss=56.316, backward_time=1.040, grad_norm=136.046, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.126e-05, train_time=8.921
+[gpub001:0/64] 2023-07-14 16:30:13,954 (trainer:732) INFO: 49epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=71.267, loss_att=55.700, acc=0.705, loss=60.370, backward_time=1.026, grad_norm=123.436, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.125e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 16:32:30,026 (trainer:732) INFO: 49epoch:train:6001-6100batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=70.944, loss_att=50.849, acc=0.722, loss=56.877, backward_time=1.022, grad_norm=205.768, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 16:34:45,468 (trainer:732) INFO: 49epoch:train:6101-6200batch: iter_time=1.363e-04, forward_time=0.144, loss_ctc=80.848, loss_att=65.774, acc=0.690, loss=70.296, backward_time=1.024, grad_norm=189.035, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.124e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 16:37:00,897 (trainer:732) INFO: 49epoch:train:6201-6300batch: iter_time=1.565e-04, forward_time=0.144, loss_ctc=68.282, loss_att=48.857, acc=0.734, loss=54.685, backward_time=1.025, grad_norm=112.677, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 16:39:16,422 (trainer:732) INFO: 49epoch:train:6301-6400batch: iter_time=1.438e-04, forward_time=0.144, loss_ctc=69.857, loss_att=51.306, acc=0.719, loss=56.871, backward_time=1.024, grad_norm=136.182, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.123e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 16:41:31,597 (trainer:732) INFO: 49epoch:train:6401-6500batch: iter_time=1.378e-04, forward_time=0.144, loss_ctc=62.547, loss_att=44.512, acc=0.727, loss=49.923, backward_time=1.022, grad_norm=115.827, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.122e-05, train_time=2.703
+[gpub001:0/64] 2023-07-14 16:43:46,948 (trainer:732) INFO: 49epoch:train:6501-6600batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=68.154, loss_att=48.823, acc=0.723, loss=54.622, backward_time=1.024, grad_norm=127.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.122e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 16:45:29,079 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-14 16:45:47,237 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf65db4f0>)
+[gpub001:0/64] 2023-07-14 16:45:50,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-14 16:45:50,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 16:49:39,620 (trainer:732) INFO: 49epoch:train:6601-6700batch: iter_time=2.079, forward_time=0.183, loss_ctc=76.672, loss_att=56.369, acc=0.715, loss=62.460, backward_time=1.034, grad_norm=132.969, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.121e-05, train_time=7.053
+[gpub001:0/64] 2023-07-14 16:51:56,314 (trainer:732) INFO: 49epoch:train:6701-6800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=70.489, loss_att=56.214, acc=0.716, loss=60.496, backward_time=1.027, grad_norm=152.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.121e-05, train_time=2.734
+[gpub001:0/64] 2023-07-14 16:54:12,308 (trainer:732) INFO: 49epoch:train:6801-6900batch: iter_time=1.133e-04, forward_time=0.143, loss_ctc=71.773, loss_att=49.137, acc=0.730, loss=55.928, backward_time=1.027, grad_norm=116.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 16:56:28,064 (trainer:732) INFO: 49epoch:train:6901-7000batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=76.239, loss_att=59.077, acc=0.716, loss=64.225, backward_time=1.026, grad_norm=122.696, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.120e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 16:58:43,796 (trainer:732) INFO: 49epoch:train:7001-7100batch: iter_time=1.142e-04, forward_time=0.144, loss_ctc=73.758, loss_att=58.472, acc=0.731, loss=63.058, backward_time=1.026, grad_norm=119.532, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 17:00:59,084 (trainer:732) INFO: 49epoch:train:7101-7200batch: iter_time=1.003e-04, forward_time=0.142, loss_ctc=67.069, loss_att=48.465, acc=0.731, loss=54.046, backward_time=1.023, grad_norm=124.919, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.119e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 17:03:14,636 (trainer:732) INFO: 49epoch:train:7201-7300batch: iter_time=9.950e-05, forward_time=0.144, loss_ctc=63.394, loss_att=43.420, acc=0.735, loss=49.412, backward_time=1.024, grad_norm=118.629, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.179, optim0_lr0=5.118e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 17:05:30,294 (trainer:732) INFO: 49epoch:train:7301-7400batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=70.573, loss_att=52.037, acc=0.731, loss=57.598, backward_time=1.025, grad_norm=138.691, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 17:07:45,765 (trainer:732) INFO: 49epoch:train:7401-7500batch: iter_time=9.254e-05, forward_time=0.144, loss_ctc=67.756, loss_att=48.815, acc=0.728, loss=54.498, backward_time=1.025, grad_norm=152.362, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.180, optim0_lr0=5.117e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 17:07:47,423 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-14 17:08:05,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd0f38b7d30>)
+[gpub001:0/64] 2023-07-14 17:08:09,029 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-14 17:08:09,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 17:15:00,221 (trainer:732) INFO: 49epoch:train:7501-7600batch: iter_time=1.254, forward_time=0.144, loss_ctc=73.840, loss_att=54.828, acc=0.713, loss=60.532, backward_time=1.035, grad_norm=126.378, clip=100.000, loss_scale=2.434e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=8.689
+[gpub001:0/64] 2023-07-14 17:17:16,603 (trainer:732) INFO: 49epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=73.851, loss_att=54.824, acc=0.712, loss=60.532, backward_time=1.027, grad_norm=149.921, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.116e-05, train_time=2.727
+[gpub001:0/64] 2023-07-14 17:19:32,240 (trainer:732) INFO: 49epoch:train:7701-7800batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=69.949, loss_att=52.432, acc=0.715, loss=57.687, backward_time=1.024, grad_norm=117.932, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.712
+[gpub001:0/64] 2023-07-14 17:21:47,986 (trainer:732) INFO: 49epoch:train:7801-7900batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=80.270, loss_att=64.561, acc=0.698, loss=69.274, backward_time=1.027, grad_norm=140.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.115e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 17:24:03,451 (trainer:732) INFO: 49epoch:train:7901-8000batch: iter_time=1.315e-04, forward_time=0.145, loss_ctc=65.982, loss_att=48.615, acc=0.731, loss=53.825, backward_time=1.024, grad_norm=143.731, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.709
+[gpub001:0/64] 2023-07-14 17:26:18,815 (trainer:732) INFO: 49epoch:train:8001-8100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=63.870, loss_att=44.066, acc=0.731, loss=50.007, backward_time=1.023, grad_norm=124.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.114e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 17:28:34,027 (trainer:732) INFO: 49epoch:train:8101-8200batch: iter_time=1.181e-04, forward_time=0.144, loss_ctc=68.490, loss_att=50.287, acc=0.723, loss=55.748, backward_time=1.021, grad_norm=116.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.704
+[gpub001:0/64] 2023-07-14 17:30:49,344 (trainer:732) INFO: 49epoch:train:8201-8300batch: iter_time=1.236e-04, forward_time=0.145, loss_ctc=60.268, loss_att=42.861, acc=0.725, loss=48.083, backward_time=1.021, grad_norm=105.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.113e-05, train_time=2.706
+[gpub001:0/64] 2023-07-14 17:31:35,992 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-14 17:31:53,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd84677c0>)
+[gpub001:0/64] 2023-07-14 17:31:57,440 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-14 17:31:57,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 17:38:13,424 (trainer:732) INFO: 49epoch:train:8301-8400batch: iter_time=1.214, forward_time=0.154, loss_ctc=81.413, loss_att=59.604, acc=0.719, loss=66.147, backward_time=1.043, grad_norm=156.011, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=8.881
+[gpub001:0/64] 2023-07-14 17:40:30,027 (trainer:732) INFO: 49epoch:train:8401-8500batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=72.136, loss_att=55.373, acc=0.718, loss=60.402, backward_time=1.026, grad_norm=133.755, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.112e-05, train_time=2.732
+[gpub001:0/64] 2023-07-14 17:42:45,754 (trainer:732) INFO: 49epoch:train:8501-8600batch: iter_time=1.202e-04, forward_time=0.145, loss_ctc=70.343, loss_att=50.701, acc=0.727, loss=56.593, backward_time=1.028, grad_norm=121.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 17:45:01,816 (trainer:732) INFO: 49epoch:train:8601-8700batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=80.448, loss_att=64.353, acc=0.708, loss=69.182, backward_time=1.028, grad_norm=135.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.111e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 17:47:20,645 (trainer:732) INFO: 49epoch:train:8701-8800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.178, loss_att=48.931, acc=0.743, loss=54.405, backward_time=1.028, grad_norm=142.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.110e-05, train_time=2.776
+[gpub001:0/64] 2023-07-14 17:49:37,238 (trainer:732) INFO: 49epoch:train:8801-8900batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=68.598, loss_att=50.099, acc=0.731, loss=55.649, backward_time=1.027, grad_norm=124.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.732
+[gpub001:0/64] 2023-07-14 17:51:56,378 (trainer:732) INFO: 49epoch:train:8901-9000batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=62.040, loss_att=43.423, acc=0.740, loss=49.008, backward_time=1.025, grad_norm=114.553, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.109e-05, train_time=2.783
+[gpub001:0/64] 2023-07-14 17:54:11,781 (trainer:732) INFO: 49epoch:train:9001-9100batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=67.395, loss_att=49.894, acc=0.729, loss=55.144, backward_time=1.022, grad_norm=128.721, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.108e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 17:56:00,273 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-14 17:56:18,139 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed219b70>)
+[gpub001:0/64] 2023-07-14 17:56:21,608 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-14 17:56:21,615 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 18:01:26,368 (trainer:732) INFO: 49epoch:train:9101-9200batch: iter_time=1.617, forward_time=0.164, loss_ctc=69.323, loss_att=50.650, acc=0.726, loss=56.252, backward_time=1.035, grad_norm=140.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.108e-05, train_time=8.692
+[gpub001:0/64] 2023-07-14 18:03:44,759 (trainer:732) INFO: 49epoch:train:9201-9300batch: iter_time=1.228e-04, forward_time=0.147, loss_ctc=73.686, loss_att=57.660, acc=0.717, loss=62.468, backward_time=1.032, grad_norm=120.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.768
+[gpub001:0/64] 2023-07-14 18:06:01,753 (trainer:732) INFO: 49epoch:train:9301-9400batch: iter_time=1.486e-04, forward_time=0.145, loss_ctc=71.589, loss_att=48.903, acc=0.728, loss=55.709, backward_time=1.025, grad_norm=119.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.107e-05, train_time=2.740
+[gpub001:0/64] 2023-07-14 18:08:19,922 (trainer:732) INFO: 49epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=80.193, loss_att=65.513, acc=0.702, loss=69.917, backward_time=1.031, grad_norm=140.511, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.763
+[gpub001:0/64] 2023-07-14 18:10:37,699 (trainer:732) INFO: 49epoch:train:9501-9600batch: iter_time=1.212e-04, forward_time=0.144, loss_ctc=67.328, loss_att=48.700, acc=0.744, loss=54.289, backward_time=1.027, grad_norm=128.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.106e-05, train_time=2.755
+[gpub001:0/64] 2023-07-14 18:12:55,391 (trainer:732) INFO: 49epoch:train:9601-9700batch: iter_time=1.316e-04, forward_time=0.143, loss_ctc=65.278, loss_att=46.806, acc=0.726, loss=52.347, backward_time=1.026, grad_norm=135.251, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.105e-05, train_time=2.754
+[gpub001:0/64] 2023-07-14 18:15:15,142 (trainer:732) INFO: 49epoch:train:9701-9800batch: iter_time=1.367e-04, forward_time=0.145, loss_ctc=65.510, loss_att=45.641, acc=0.741, loss=51.602, backward_time=1.034, grad_norm=115.004, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.105e-05, train_time=2.795
+[gpub001:0/64] 2023-07-14 18:17:33,807 (trainer:732) INFO: 49epoch:train:9801-9900batch: iter_time=1.295e-04, forward_time=0.144, loss_ctc=64.369, loss_att=47.790, acc=0.723, loss=52.764, backward_time=1.027, grad_norm=108.677, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.773
+[gpub001:0/64] 2023-07-14 18:19:49,329 (trainer:732) INFO: 49epoch:train:9901-10000batch: iter_time=1.004e-04, forward_time=0.144, loss_ctc=76.087, loss_att=54.280, acc=0.724, loss=60.822, backward_time=1.024, grad_norm=120.371, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.104e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 18:33:02,493 (trainer:338) INFO: 49epoch results: [train] iter_time=0.177, forward_time=0.146, loss_ctc=71.210, loss_att=52.672, acc=0.719, loss=58.234, backward_time=1.030, grad_norm=131.679, clip=100.000, loss_scale=2.702e+32, optim_step_time=0.180, optim0_lr0=5.130e-05, train_time=3.462, time=4 hours, 48 minutes and 48.83 seconds, total_count=460000, gpu_max_cached_mem_GB=34.336, [valid] loss_ctc=43.418, cer_ctc=0.254, loss_att=37.707, acc=0.674, cer=0.423, wer=0.998, loss=39.421, time=7 minutes and 3.51 seconds, total_count=47058, gpu_max_cached_mem_GB=37.631, [att_plot] time=5 minutes and 53.56 seconds, total_count=0, gpu_max_cached_mem_GB=37.631
+[gpub001:0/64] 2023-07-14 18:33:18,474 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-14 18:33:18,484 (trainer:272) INFO: 50/60epoch started. Estimated time to finish: 2 days, 7 hours and 22 minutes
+[gpub001:0/64] 2023-07-14 18:33:18,487 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-14 18:33:35,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf61aa1a0>)
+[gpub001:0/64] 2023-07-14 18:33:39,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-14 18:33:39,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 18:37:48,677 (trainer:732) INFO: 50epoch:train:1-100batch: iter_time=1.165, forward_time=0.188, loss_ctc=76.860, loss_att=56.022, acc=0.704, loss=62.273, backward_time=1.063, grad_norm=182.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.103e-05, train_time=5.403
+[gpub001:0/64] 2023-07-14 18:40:12,390 (trainer:732) INFO: 50epoch:train:101-200batch: iter_time=9.433e-05, forward_time=0.179, loss_ctc=63.633, loss_att=45.042, acc=0.731, loss=50.619, backward_time=1.036, grad_norm=138.710, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.103e-05, train_time=2.872
+[gpub001:0/64] 2023-07-14 18:42:28,865 (trainer:732) INFO: 50epoch:train:201-300batch: iter_time=9.529e-05, forward_time=0.145, loss_ctc=66.674, loss_att=52.287, acc=0.720, loss=56.603, backward_time=1.031, grad_norm=131.160, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.102e-05, train_time=2.731
+[gpub001:0/64] 2023-07-14 18:44:55,125 (trainer:732) INFO: 50epoch:train:301-400batch: iter_time=9.373e-05, forward_time=0.144, loss_ctc=70.708, loss_att=47.914, acc=0.725, loss=54.752, backward_time=1.038, grad_norm=128.284, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.925
+[gpub001:0/64] 2023-07-14 18:47:16,114 (trainer:732) INFO: 50epoch:train:401-500batch: iter_time=9.543e-05, forward_time=0.145, loss_ctc=76.756, loss_att=56.886, acc=0.714, loss=62.847, backward_time=1.034, grad_norm=131.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.101e-05, train_time=2.820
+[gpub001:0/64] 2023-07-14 18:49:35,575 (trainer:732) INFO: 50epoch:train:501-600batch: iter_time=9.838e-05, forward_time=0.144, loss_ctc=66.678, loss_att=50.737, acc=0.720, loss=55.519, backward_time=1.034, grad_norm=115.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.789
+[gpub001:0/64] 2023-07-14 18:52:04,139 (trainer:732) INFO: 50epoch:train:601-700batch: iter_time=9.636e-05, forward_time=0.157, loss_ctc=66.833, loss_att=48.220, acc=0.716, loss=53.804, backward_time=1.050, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.100e-05, train_time=2.971
+[gpub001:0/64] 2023-07-14 18:54:29,918 (trainer:732) INFO: 50epoch:train:701-800batch: iter_time=9.484e-05, forward_time=0.144, loss_ctc=71.313, loss_att=46.886, acc=0.720, loss=54.214, backward_time=1.043, grad_norm=121.906, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.099e-05, train_time=2.915
+[gpub001:0/64] 2023-07-14 18:55:23,066 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-14 18:55:40,513 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf61a96f0>)
+[gpub001:0/64] 2023-07-14 18:55:43,867 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-14 18:55:43,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 19:00:29,306 (trainer:732) INFO: 50epoch:train:801-900batch: iter_time=1.702, forward_time=0.164, loss_ctc=74.067, loss_att=54.822, acc=0.707, loss=60.595, backward_time=1.045, grad_norm=162.733, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.099e-05, train_time=7.188
+[gpub001:0/64] 2023-07-14 19:02:46,018 (trainer:732) INFO: 50epoch:train:901-1000batch: iter_time=1.192e-04, forward_time=0.144, loss_ctc=63.885, loss_att=44.598, acc=0.731, loss=50.384, backward_time=1.029, grad_norm=110.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.734
+[gpub001:0/64] 2023-07-14 19:05:02,005 (trainer:732) INFO: 50epoch:train:1001-1100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=65.165, loss_att=50.965, acc=0.731, loss=55.225, backward_time=1.029, grad_norm=115.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.098e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 19:07:17,905 (trainer:732) INFO: 50epoch:train:1101-1200batch: iter_time=1.223e-04, forward_time=0.144, loss_ctc=72.477, loss_att=48.920, acc=0.727, loss=55.987, backward_time=1.029, grad_norm=135.909, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.718
+[gpub001:0/64] 2023-07-14 19:09:33,663 (trainer:732) INFO: 50epoch:train:1201-1300batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=76.221, loss_att=57.505, acc=0.709, loss=63.120, backward_time=1.029, grad_norm=152.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.097e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 19:11:52,890 (trainer:732) INFO: 50epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=64.913, loss_att=49.074, acc=0.717, loss=53.826, backward_time=1.032, grad_norm=143.678, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.784
+[gpub001:0/64] 2023-07-14 19:14:08,777 (trainer:732) INFO: 50epoch:train:1401-1500batch: iter_time=1.097e-04, forward_time=0.145, loss_ctc=64.207, loss_att=46.300, acc=0.727, loss=51.672, backward_time=1.028, grad_norm=96.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.096e-05, train_time=2.718
+[gpub001:0/64] 2023-07-14 19:16:24,490 (trainer:732) INFO: 50epoch:train:1501-1600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=72.579, loss_att=48.346, acc=0.712, loss=55.616, backward_time=1.026, grad_norm=117.818, clip=100.000, loss_scale=4.868e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 19:18:06,491 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-14 19:18:24,593 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd941bbb0>)
+[gpub001:0/64] 2023-07-14 19:18:28,023 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-14 19:18:28,029 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 19:21:27,486 (trainer:732) INFO: 50epoch:train:1601-1700batch: iter_time=1.532, forward_time=0.145, loss_ctc=76.069, loss_att=57.854, acc=0.715, loss=63.318, backward_time=1.042, grad_norm=120.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.095e-05, train_time=6.060
+[gpub001:0/64] 2023-07-14 19:23:44,660 (trainer:732) INFO: 50epoch:train:1701-1800batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=69.245, loss_att=50.311, acc=0.700, loss=55.991, backward_time=1.031, grad_norm=146.320, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.743
+[gpub001:0/64] 2023-07-14 19:26:00,439 (trainer:732) INFO: 50epoch:train:1801-1900batch: iter_time=1.160e-04, forward_time=0.144, loss_ctc=65.293, loss_att=47.043, acc=0.727, loss=52.518, backward_time=1.025, grad_norm=118.987, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.094e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 19:28:15,850 (trainer:732) INFO: 50epoch:train:1901-2000batch: iter_time=1.534e-04, forward_time=0.146, loss_ctc=64.296, loss_att=50.158, acc=0.720, loss=54.399, backward_time=1.025, grad_norm=125.395, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.093e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 19:30:31,788 (trainer:732) INFO: 50epoch:train:2001-2100batch: iter_time=1.528e-04, forward_time=0.147, loss_ctc=72.538, loss_att=52.328, acc=0.704, loss=58.391, backward_time=1.030, grad_norm=133.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 19:32:47,884 (trainer:732) INFO: 50epoch:train:2101-2200batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=73.571, loss_att=55.147, acc=0.713, loss=60.674, backward_time=1.031, grad_norm=116.254, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.092e-05, train_time=2.722
+[gpub001:0/64] 2023-07-14 19:35:03,746 (trainer:732) INFO: 50epoch:train:2201-2300batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=66.832, loss_att=48.835, acc=0.707, loss=54.234, backward_time=1.029, grad_norm=126.747, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 19:37:19,158 (trainer:732) INFO: 50epoch:train:2301-2400batch: iter_time=1.330e-04, forward_time=0.146, loss_ctc=69.922, loss_att=45.724, acc=0.719, loss=52.983, backward_time=1.027, grad_norm=121.655, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.091e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 19:39:35,125 (trainer:732) INFO: 50epoch:train:2401-2500batch: iter_time=1.500e-04, forward_time=0.147, loss_ctc=69.057, loss_att=51.498, acc=0.713, loss=56.766, backward_time=1.030, grad_norm=136.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 19:39:36,493 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-14 19:39:54,855 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd879d4b0>)
+[gpub001:0/64] 2023-07-14 19:39:58,286 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-14 19:39:58,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 19:47:03,128 (trainer:732) INFO: 50epoch:train:2501-2600batch: iter_time=1.229, forward_time=0.146, loss_ctc=75.319, loss_att=54.368, acc=0.702, loss=60.653, backward_time=1.044, grad_norm=145.226, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.090e-05, train_time=8.960
+[gpub001:0/64] 2023-07-14 19:49:19,629 (trainer:732) INFO: 50epoch:train:2601-2700batch: iter_time=1.033e-04, forward_time=0.147, loss_ctc=62.652, loss_att=44.316, acc=0.729, loss=49.817, backward_time=1.029, grad_norm=163.995, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.730
+[gpub001:0/64] 2023-07-14 19:51:35,460 (trainer:732) INFO: 50epoch:train:2701-2800batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=65.546, loss_att=51.752, acc=0.720, loss=55.890, backward_time=1.028, grad_norm=116.187, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.089e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 19:53:51,137 (trainer:732) INFO: 50epoch:train:2801-2900batch: iter_time=1.003e-04, forward_time=0.145, loss_ctc=69.677, loss_att=46.545, acc=0.724, loss=53.485, backward_time=1.027, grad_norm=151.273, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 19:55:55,895 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 19:56:06,946 (trainer:732) INFO: 50epoch:train:2901-3000batch: iter_time=1.015e-04, forward_time=0.145, loss_ctc=75.469, loss_att=56.637, acc=0.712, loss=62.287, backward_time=1.028, grad_norm=123.542, clip=100.000, loss_scale=6.225e+32, optim_step_time=0.181, optim0_lr0=5.088e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 19:58:23,795 (trainer:732) INFO: 50epoch:train:3001-3100batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=64.660, loss_att=50.088, acc=0.711, loss=54.459, backward_time=1.029, grad_norm=139.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.737
+[gpub001:0/64] 2023-07-14 20:00:39,421 (trainer:732) INFO: 50epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.144, loss_ctc=65.956, loss_att=46.842, acc=0.716, loss=52.576, backward_time=1.026, grad_norm=112.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.087e-05, train_time=2.712
+[gpub001:0/64] 2023-07-14 20:02:55,138 (trainer:732) INFO: 50epoch:train:3201-3300batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=69.096, loss_att=46.225, acc=0.715, loss=53.086, backward_time=1.028, grad_norm=123.518, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.086e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 20:03:41,171 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-14 20:03:59,687 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd87a3520>)
+[gpub001:0/64] 2023-07-14 20:04:03,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-14 20:04:03,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 20:10:00,535 (trainer:732) INFO: 50epoch:train:3301-3400batch: iter_time=1.229, forward_time=0.207, loss_ctc=76.427, loss_att=55.751, acc=0.711, loss=61.954, backward_time=1.042, grad_norm=113.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.086e-05, train_time=8.507
+[gpub001:0/64] 2023-07-14 20:12:16,912 (trainer:732) INFO: 50epoch:train:3401-3500batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=67.057, loss_att=48.763, acc=0.708, loss=54.251, backward_time=1.030, grad_norm=130.479, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.728
+[gpub001:0/64] 2023-07-14 20:14:32,971 (trainer:732) INFO: 50epoch:train:3501-3600batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=67.118, loss_att=49.572, acc=0.728, loss=54.835, backward_time=1.028, grad_norm=137.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.085e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 20:16:49,816 (trainer:732) INFO: 50epoch:train:3601-3700batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=65.072, loss_att=47.251, acc=0.722, loss=52.598, backward_time=1.026, grad_norm=124.680, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.084e-05, train_time=2.737
+[gpub001:0/64] 2023-07-14 20:19:06,520 (trainer:732) INFO: 50epoch:train:3701-3800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=68.645, loss_att=49.960, acc=0.716, loss=55.566, backward_time=1.026, grad_norm=119.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.084e-05, train_time=2.734
+[gpub001:0/64] 2023-07-14 20:21:22,393 (trainer:732) INFO: 50epoch:train:3801-3900batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=71.225, loss_att=52.976, acc=0.716, loss=58.451, backward_time=1.027, grad_norm=125.839, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.083e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 20:23:38,267 (trainer:732) INFO: 50epoch:train:3901-4000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=67.296, loss_att=49.609, acc=0.708, loss=54.915, backward_time=1.027, grad_norm=122.521, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 20:25:53,904 (trainer:732) INFO: 50epoch:train:4001-4100batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=68.722, loss_att=44.612, acc=0.725, loss=51.845, backward_time=1.026, grad_norm=133.861, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.082e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 20:27:25,140 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-14 20:27:42,919 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5f29074f0>)
+[gpub001:0/64] 2023-07-14 20:27:46,370 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-14 20:27:46,376 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 20:31:59,928 (trainer:732) INFO: 50epoch:train:4101-4200batch: iter_time=1.266, forward_time=0.158, loss_ctc=69.892, loss_att=54.719, acc=0.724, loss=59.271, backward_time=1.041, grad_norm=120.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=7.318
+[gpub001:0/64] 2023-07-14 20:34:16,845 (trainer:732) INFO: 50epoch:train:4201-4300batch: iter_time=1.347e-04, forward_time=0.148, loss_ctc=72.245, loss_att=50.876, acc=0.714, loss=57.287, backward_time=1.033, grad_norm=144.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.081e-05, train_time=2.740
+[gpub001:0/64] 2023-07-14 20:36:33,354 (trainer:732) INFO: 50epoch:train:4301-4400batch: iter_time=1.372e-04, forward_time=0.145, loss_ctc=64.193, loss_att=45.992, acc=0.740, loss=51.452, backward_time=1.028, grad_norm=136.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.730
+[gpub001:0/64] 2023-07-14 20:38:49,363 (trainer:732) INFO: 50epoch:train:4401-4500batch: iter_time=1.302e-04, forward_time=0.145, loss_ctc=64.055, loss_att=49.713, acc=0.733, loss=54.015, backward_time=1.029, grad_norm=152.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.080e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 20:41:05,362 (trainer:732) INFO: 50epoch:train:4501-4600batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=71.594, loss_att=49.931, acc=0.723, loss=56.430, backward_time=1.030, grad_norm=141.739, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 20:43:21,381 (trainer:732) INFO: 50epoch:train:4601-4700batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.948, loss_att=55.695, acc=0.725, loss=60.871, backward_time=1.029, grad_norm=108.629, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.079e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 20:45:37,140 (trainer:732) INFO: 50epoch:train:4701-4800batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=65.934, loss_att=47.948, acc=0.724, loss=53.344, backward_time=1.028, grad_norm=152.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 20:47:52,965 (trainer:732) INFO: 50epoch:train:4801-4900batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.950, loss_att=45.382, acc=0.730, loss=52.453, backward_time=1.029, grad_norm=130.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.078e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 20:50:08,537 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-14 20:50:26,538 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 20:50:30,045 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9ccce20>)
+[gpub001:0/64] 2023-07-14 20:50:30,046 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-14 20:50:30,052 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 20:54:56,835 (trainer:732) INFO: 50epoch:train:4901-5000batch: iter_time=1.261, forward_time=0.166, loss_ctc=68.113, loss_att=51.907, acc=0.712, loss=56.769, backward_time=1.030, grad_norm=123.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=8.477
+[gpub001:0/64] 2023-07-14 20:57:14,888 (trainer:732) INFO: 50epoch:train:5001-5100batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=74.370, loss_att=53.834, acc=0.714, loss=59.995, backward_time=1.037, grad_norm=133.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=2.761
+[gpub001:0/64] 2023-07-14 20:59:30,447 (trainer:732) INFO: 50epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=63.418, loss_att=44.065, acc=0.743, loss=49.871, backward_time=1.025, grad_norm=118.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 21:01:46,392 (trainer:732) INFO: 50epoch:train:5201-5300batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.343, loss_att=50.622, acc=0.729, loss=54.738, backward_time=1.029, grad_norm=107.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.076e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 21:04:02,266 (trainer:732) INFO: 50epoch:train:5301-5400batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=67.130, loss_att=45.243, acc=0.738, loss=51.809, backward_time=1.027, grad_norm=126.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.717
+[gpub001:0/64] 2023-07-14 21:06:18,342 (trainer:732) INFO: 50epoch:train:5401-5500batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=74.073, loss_att=55.688, acc=0.724, loss=61.203, backward_time=1.029, grad_norm=120.175, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.075e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 21:08:34,524 (trainer:732) INFO: 50epoch:train:5501-5600batch: iter_time=1.200e-04, forward_time=0.148, loss_ctc=64.861, loss_att=50.199, acc=0.726, loss=54.598, backward_time=1.029, grad_norm=114.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.723
+[gpub001:0/64] 2023-07-14 21:10:57,350 (trainer:732) INFO: 50epoch:train:5601-5700batch: iter_time=1.141e-04, forward_time=0.146, loss_ctc=63.074, loss_att=45.439, acc=0.726, loss=50.730, backward_time=1.034, grad_norm=110.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.074e-05, train_time=2.856
+[gpub001:0/64] 2023-07-14 21:13:13,082 (trainer:732) INFO: 50epoch:train:5701-5800batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=69.031, loss_att=46.799, acc=0.723, loss=53.469, backward_time=1.027, grad_norm=143.702, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.073e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 21:14:11,324 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-14 21:14:29,273 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9ccf790>)
+[gpub001:0/64] 2023-07-14 21:14:32,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-14 21:14:32,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 21:19:03,310 (trainer:732) INFO: 50epoch:train:5801-5900batch: iter_time=2.002, forward_time=0.183, loss_ctc=73.655, loss_att=53.932, acc=0.722, loss=59.849, backward_time=1.055, grad_norm=115.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.073e-05, train_time=7.004
+[gpub001:0/64] 2023-07-14 21:21:32,431 (trainer:732) INFO: 50epoch:train:5901-6000batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=66.964, loss_att=50.032, acc=0.725, loss=55.112, backward_time=1.037, grad_norm=127.025, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.072e-05, train_time=2.983
+[gpub001:0/64] 2023-07-14 21:24:07,379 (trainer:732) INFO: 50epoch:train:6001-6100batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=66.762, loss_att=49.729, acc=0.735, loss=54.839, backward_time=1.080, grad_norm=115.938, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=3.099
+[gpub001:0/64] 2023-07-14 21:26:28,272 (trainer:732) INFO: 50epoch:train:6101-6200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=65.838, loss_att=47.261, acc=0.729, loss=52.834, backward_time=1.034, grad_norm=114.927, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.071e-05, train_time=2.818
+[gpub001:0/64] 2023-07-14 21:28:48,811 (trainer:732) INFO: 50epoch:train:6201-6300batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=68.647, loss_att=49.152, acc=0.730, loss=55.000, backward_time=1.037, grad_norm=128.845, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.811
+[gpub001:0/64] 2023-07-14 21:30:05,936 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-14 21:31:12,672 (trainer:732) INFO: 50epoch:train:6301-6400batch: iter_time=1.053e-04, forward_time=0.147, loss_ctc=69.972, loss_att=51.775, acc=0.730, loss=57.234, backward_time=1.035, grad_norm=136.068, clip=100.000, loss_scale=2.484e+32, optim_step_time=0.182, optim0_lr0=5.070e-05, train_time=2.877
+[gpub001:0/64] 2023-07-14 21:33:31,183 (trainer:732) INFO: 50epoch:train:6401-6500batch: iter_time=1.154e-04, forward_time=0.146, loss_ctc=66.029, loss_att=48.270, acc=0.719, loss=53.598, backward_time=1.031, grad_norm=131.463, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.770
+[gpub001:0/64] 2023-07-14 21:35:47,199 (trainer:732) INFO: 50epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=68.099, loss_att=44.525, acc=0.732, loss=51.598, backward_time=1.027, grad_norm=116.086, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.069e-05, train_time=2.720
+[gpub001:0/64] 2023-07-14 21:37:20,083 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-14 21:37:38,383 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5a87ef4f0>)
+[gpub001:0/64] 2023-07-14 21:37:41,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-14 21:37:41,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 21:41:34,057 (trainer:732) INFO: 50epoch:train:6601-6700batch: iter_time=1.243, forward_time=0.149, loss_ctc=75.228, loss_att=56.364, acc=0.705, loss=62.024, backward_time=1.043, grad_norm=131.714, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=6.937
+[gpub001:0/64] 2023-07-14 21:43:51,597 (trainer:732) INFO: 50epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=62.359, loss_att=46.440, acc=0.717, loss=51.216, backward_time=1.031, grad_norm=126.683, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.068e-05, train_time=2.751
+[gpub001:0/64] 2023-07-14 21:46:07,899 (trainer:732) INFO: 50epoch:train:6801-6900batch: iter_time=1.299e-04, forward_time=0.146, loss_ctc=65.735, loss_att=48.063, acc=0.725, loss=53.365, backward_time=1.029, grad_norm=134.386, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 21:48:23,594 (trainer:732) INFO: 50epoch:train:6901-7000batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=68.323, loss_att=49.027, acc=0.722, loss=54.815, backward_time=1.026, grad_norm=130.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.067e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 21:50:39,377 (trainer:732) INFO: 50epoch:train:7001-7100batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=70.496, loss_att=52.439, acc=0.716, loss=57.856, backward_time=1.029, grad_norm=129.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 21:52:55,578 (trainer:732) INFO: 50epoch:train:7101-7200batch: iter_time=1.273e-04, forward_time=0.147, loss_ctc=67.037, loss_att=51.153, acc=0.710, loss=55.918, backward_time=1.031, grad_norm=135.297, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.066e-05, train_time=2.724
+[gpub001:0/64] 2023-07-14 21:55:11,269 (trainer:732) INFO: 50epoch:train:7201-7300batch: iter_time=1.392e-04, forward_time=0.146, loss_ctc=64.625, loss_att=47.415, acc=0.716, loss=52.578, backward_time=1.028, grad_norm=138.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 21:57:27,032 (trainer:732) INFO: 50epoch:train:7301-7400batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=67.172, loss_att=45.670, acc=0.720, loss=52.120, backward_time=1.027, grad_norm=138.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.065e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 21:59:42,685 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-14 22:00:00,870 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed65b4f0>)
+[gpub001:0/64] 2023-07-14 22:00:04,268 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-14 22:00:04,274 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 22:04:14,311 (trainer:732) INFO: 50epoch:train:7401-7500batch: iter_time=1.298, forward_time=0.174, loss_ctc=72.785, loss_att=54.934, acc=0.706, loss=60.289, backward_time=1.034, grad_norm=127.076, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.064e-05, train_time=8.145
+[gpub001:0/64] 2023-07-14 22:06:32,396 (trainer:732) INFO: 50epoch:train:7501-7600batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=75.267, loss_att=55.281, acc=0.706, loss=61.277, backward_time=1.033, grad_norm=127.032, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.064e-05, train_time=2.761
+[gpub001:0/64] 2023-07-14 22:08:49,298 (trainer:732) INFO: 50epoch:train:7601-7700batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=61.713, loss_att=43.430, acc=0.731, loss=48.915, backward_time=1.029, grad_norm=113.236, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 22:11:05,012 (trainer:732) INFO: 50epoch:train:7701-7800batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=64.435, loss_att=51.314, acc=0.720, loss=55.251, backward_time=1.025, grad_norm=137.487, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.063e-05, train_time=2.714
+[gpub001:0/64] 2023-07-14 22:13:20,529 (trainer:732) INFO: 50epoch:train:7801-7900batch: iter_time=1.071e-04, forward_time=0.145, loss_ctc=67.719, loss_att=45.921, acc=0.729, loss=52.460, backward_time=1.026, grad_norm=116.618, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.710
+[gpub001:0/64] 2023-07-14 22:15:36,651 (trainer:732) INFO: 50epoch:train:7901-8000batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=74.668, loss_att=55.808, acc=0.718, loss=61.466, backward_time=1.030, grad_norm=138.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.062e-05, train_time=2.722
+[gpub001:0/64] 2023-07-14 22:17:52,426 (trainer:732) INFO: 50epoch:train:8001-8100batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=64.712, loss_att=49.154, acc=0.717, loss=53.822, backward_time=1.027, grad_norm=134.088, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.715
+[gpub001:0/64] 2023-07-14 22:20:08,073 (trainer:732) INFO: 50epoch:train:8101-8200batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=62.415, loss_att=45.458, acc=0.725, loss=50.545, backward_time=1.026, grad_norm=104.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.061e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 22:22:24,409 (trainer:732) INFO: 50epoch:train:8201-8300batch: iter_time=1.217e-04, forward_time=0.144, loss_ctc=69.803, loss_att=47.319, acc=0.716, loss=54.064, backward_time=1.024, grad_norm=120.199, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.060e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 22:23:27,929 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-14 22:23:46,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fde64a26290>)
+[gpub001:0/64] 2023-07-14 22:23:49,965 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-14 22:23:49,971 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 22:29:23,107 (trainer:732) INFO: 50epoch:train:8301-8400batch: iter_time=1.937, forward_time=0.171, loss_ctc=73.351, loss_att=54.696, acc=0.704, loss=60.292, backward_time=1.042, grad_norm=126.226, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.060e-05, train_time=8.373
+[gpub001:0/64] 2023-07-14 22:31:39,375 (trainer:732) INFO: 50epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=65.360, loss_att=47.307, acc=0.724, loss=52.723, backward_time=1.028, grad_norm=106.196, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.059e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 22:33:56,198 (trainer:732) INFO: 50epoch:train:8501-8600batch: iter_time=1.121e-04, forward_time=0.145, loss_ctc=62.356, loss_att=47.082, acc=0.731, loss=51.664, backward_time=1.028, grad_norm=116.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.736
+[gpub001:0/64] 2023-07-14 22:36:12,773 (trainer:732) INFO: 50epoch:train:8601-8700batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.928, loss_att=48.566, acc=0.721, loss=54.975, backward_time=1.028, grad_norm=123.106, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.058e-05, train_time=2.731
+[gpub001:0/64] 2023-07-14 22:38:28,734 (trainer:732) INFO: 50epoch:train:8701-8800batch: iter_time=1.093e-04, forward_time=0.145, loss_ctc=73.339, loss_att=55.181, acc=0.711, loss=60.628, backward_time=1.027, grad_norm=118.978, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.057e-05, train_time=2.719
+[gpub001:0/64] 2023-07-14 22:40:44,531 (trainer:732) INFO: 50epoch:train:8801-8900batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=63.033, loss_att=48.849, acc=0.707, loss=53.104, backward_time=1.028, grad_norm=132.965, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.057e-05, train_time=2.716
+[gpub001:0/64] 2023-07-14 22:42:59,903 (trainer:732) INFO: 50epoch:train:8901-9000batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=64.413, loss_att=46.307, acc=0.729, loss=51.739, backward_time=1.026, grad_norm=96.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.707
+[gpub001:0/64] 2023-07-14 22:45:18,107 (trainer:732) INFO: 50epoch:train:9001-9100batch: iter_time=1.165e-04, forward_time=0.147, loss_ctc=69.114, loss_att=45.854, acc=0.719, loss=52.832, backward_time=1.032, grad_norm=140.806, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.056e-05, train_time=2.764
+[gpub001:0/64] 2023-07-14 22:46:49,602 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-14 22:47:07,559 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb17967460>)
+[gpub001:0/64] 2023-07-14 22:47:10,970 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-14 22:47:11,050 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 22:52:32,878 (trainer:732) INFO: 50epoch:train:9101-9200batch: iter_time=2.016, forward_time=0.145, loss_ctc=79.028, loss_att=57.548, acc=0.699, loss=63.992, backward_time=1.040, grad_norm=136.426, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.055e-05, train_time=8.695
+[gpub001:0/64] 2023-07-14 22:54:49,816 (trainer:732) INFO: 50epoch:train:9201-9300batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=62.727, loss_att=47.586, acc=0.737, loss=52.129, backward_time=1.032, grad_norm=123.258, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.055e-05, train_time=2.739
+[gpub001:0/64] 2023-07-14 22:57:06,559 (trainer:732) INFO: 50epoch:train:9301-9400batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=64.256, loss_att=48.937, acc=0.734, loss=53.533, backward_time=1.029, grad_norm=116.732, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.735
+[gpub001:0/64] 2023-07-14 22:59:22,627 (trainer:732) INFO: 50epoch:train:9401-9500batch: iter_time=1.020e-04, forward_time=0.144, loss_ctc=68.645, loss_att=49.152, acc=0.732, loss=55.000, backward_time=1.026, grad_norm=126.326, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.054e-05, train_time=2.721
+[gpub001:0/64] 2023-07-14 23:01:39,743 (trainer:732) INFO: 50epoch:train:9501-9600batch: iter_time=1.028e-04, forward_time=0.146, loss_ctc=71.460, loss_att=50.831, acc=0.729, loss=57.020, backward_time=1.030, grad_norm=125.820, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.053e-05, train_time=2.742
+[gpub001:0/64] 2023-07-14 23:03:56,039 (trainer:732) INFO: 50epoch:train:9601-9700batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=65.840, loss_att=52.027, acc=0.718, loss=56.171, backward_time=1.029, grad_norm=137.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.053e-05, train_time=2.726
+[gpub001:0/64] 2023-07-14 23:06:11,688 (trainer:732) INFO: 50epoch:train:9701-9800batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=65.045, loss_att=48.007, acc=0.726, loss=53.118, backward_time=1.026, grad_norm=116.113, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 23:08:27,117 (trainer:732) INFO: 50epoch:train:9801-9900batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=69.175, loss_att=45.588, acc=0.730, loss=52.664, backward_time=1.025, grad_norm=139.307, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.052e-05, train_time=2.708
+[gpub001:0/64] 2023-07-14 23:10:42,703 (trainer:732) INFO: 50epoch:train:9901-10000batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=72.841, loss_att=53.971, acc=0.715, loss=59.632, backward_time=1.025, grad_norm=123.765, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.051e-05, train_time=2.711
+[gpub001:0/64] 2023-07-14 23:24:09,245 (trainer:338) INFO: 50epoch results: [train] iter_time=0.179, forward_time=0.148, loss_ctc=68.583, loss_att=49.901, acc=0.720, loss=55.506, backward_time=1.032, grad_norm=127.995, clip=100.000, loss_scale=3.121e+32, optim_step_time=0.182, optim0_lr0=5.077e-05, train_time=3.329, time=4 hours, 37 minutes and 40.49 seconds, total_count=470000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.429, cer_ctc=0.252, loss_att=37.607, acc=0.676, cer=0.417, wer=0.998, loss=39.054, time=7 minutes and 17.28 seconds, total_count=48070, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 52.99 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-14 23:24:24,757 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-14 23:24:24,796 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till50epoch.pth
+[gpub001:0/64] 2023-07-14 23:25:13,113 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till50epoch.pth
+[gpub001:0/64] 2023-07-14 23:25:53,416 (trainer:272) INFO: 51/60epoch started. Estimated time to finish: 2 days, 1 hour and 33 minutes
+[gpub001:0/64] 2023-07-14 23:25:55,441 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-14 23:26:15,006 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 23:26:18,855 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd11e33b6a0>)
+[gpub001:0/64] 2023-07-14 23:26:18,856 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-14 23:26:18,921 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-14 23:36:35,831 (trainer:732) INFO: 51epoch:train:1-100batch: iter_time=4.981, forward_time=0.181, loss_ctc=71.923, loss_att=55.595, acc=0.703, loss=60.494, backward_time=1.041, grad_norm=115.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.051e-05, train_time=12.824
+[gpub001:0/64] 2023-07-14 23:38:52,739 (trainer:732) INFO: 51epoch:train:101-200batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=79.235, loss_att=65.934, acc=0.698, loss=69.924, backward_time=1.031, grad_norm=121.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.738
+[gpub001:0/64] 2023-07-14 23:41:08,953 (trainer:732) INFO: 51epoch:train:201-300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=69.961, loss_att=49.731, acc=0.710, loss=55.800, backward_time=1.029, grad_norm=132.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.050e-05, train_time=2.724
+[gpub001:0/64] 2023-07-14 23:43:26,233 (trainer:732) INFO: 51epoch:train:301-400batch: iter_time=1.131e-04, forward_time=0.144, loss_ctc=64.152, loss_att=48.408, acc=0.691, loss=53.132, backward_time=1.026, grad_norm=128.225, clip=100.000, loss_scale=2.369e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.745
+[gpub001:0/64] 2023-07-14 23:45:41,876 (trainer:732) INFO: 51epoch:train:401-500batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=72.180, loss_att=50.535, acc=0.705, loss=57.029, backward_time=1.028, grad_norm=153.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.049e-05, train_time=2.713
+[gpub001:0/64] 2023-07-14 23:48:00,509 (trainer:732) INFO: 51epoch:train:501-600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=81.916, loss_att=67.146, acc=0.698, loss=71.577, backward_time=1.036, grad_norm=135.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.772
+[gpub001:0/64] 2023-07-14 23:50:17,111 (trainer:732) INFO: 51epoch:train:601-700batch: iter_time=1.289e-04, forward_time=0.145, loss_ctc=62.275, loss_att=44.773, acc=0.726, loss=50.023, backward_time=1.029, grad_norm=104.898, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.048e-05, train_time=2.732
+[gpub001:0/64] 2023-07-14 23:52:33,852 (trainer:732) INFO: 51epoch:train:701-800batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=74.632, loss_att=56.174, acc=0.688, loss=61.711, backward_time=1.027, grad_norm=120.912, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.047e-05, train_time=2.735
+[gpub001:0/64] 2023-07-14 23:53:29,332 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-14 23:53:47,988 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-14 23:53:51,397 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd11e33a920>)
+[gpub001:0/64] 2023-07-14 23:53:51,398 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-14 23:53:51,404 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 00:02:44,151 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 00:03:11,538 (trainer:732) INFO: 51epoch:train:801-900batch: iter_time=4.917, forward_time=0.176, loss_ctc=75.023, loss_att=61.940, acc=0.702, loss=65.865, backward_time=1.043, grad_norm=135.587, clip=100.000, loss_scale=2.914e+32, optim_step_time=0.183, optim0_lr0=5.047e-05, train_time=12.754
+[gpub001:0/64] 2023-07-15 00:05:28,614 (trainer:732) INFO: 51epoch:train:901-1000batch: iter_time=1.399e-04, forward_time=0.146, loss_ctc=77.022, loss_att=61.388, acc=0.701, loss=66.078, backward_time=1.031, grad_norm=116.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 00:07:45,430 (trainer:732) INFO: 51epoch:train:1001-1100batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=72.448, loss_att=57.061, acc=0.706, loss=61.677, backward_time=1.029, grad_norm=118.194, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.046e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 00:10:00,792 (trainer:732) INFO: 51epoch:train:1101-1200batch: iter_time=1.132e-04, forward_time=0.143, loss_ctc=63.641, loss_att=47.528, acc=0.704, loss=52.362, backward_time=1.025, grad_norm=122.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.045e-05, train_time=2.707
+[gpub001:0/64] 2023-07-15 00:12:16,409 (trainer:732) INFO: 51epoch:train:1201-1300batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=64.509, loss_att=47.066, acc=0.702, loss=52.299, backward_time=1.025, grad_norm=124.287, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.045e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 00:14:31,911 (trainer:732) INFO: 51epoch:train:1301-1400batch: iter_time=1.155e-04, forward_time=0.143, loss_ctc=81.052, loss_att=63.313, acc=0.697, loss=68.635, backward_time=1.025, grad_norm=161.228, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 00:16:47,763 (trainer:732) INFO: 51epoch:train:1401-1500batch: iter_time=1.052e-04, forward_time=0.144, loss_ctc=72.270, loss_att=53.340, acc=0.717, loss=59.019, backward_time=1.028, grad_norm=137.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.044e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 00:19:03,111 (trainer:732) INFO: 51epoch:train:1501-1600batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=70.162, loss_att=50.618, acc=0.706, loss=56.481, backward_time=1.025, grad_norm=170.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.043e-05, train_time=2.707
+[gpub001:0/64] 2023-07-15 00:20:39,237 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 00:20:57,151 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd12b7f2b60>)
+[gpub001:0/64] 2023-07-15 00:21:00,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 00:21:00,582 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 00:24:56,757 (trainer:732) INFO: 51epoch:train:1601-1700batch: iter_time=1.325, forward_time=0.146, loss_ctc=74.706, loss_att=64.628, acc=0.698, loss=67.651, backward_time=1.039, grad_norm=142.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.043e-05, train_time=7.073
+[gpub001:0/64] 2023-07-15 00:27:14,907 (trainer:732) INFO: 51epoch:train:1701-1800batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=75.949, loss_att=63.359, acc=0.712, loss=67.136, backward_time=1.038, grad_norm=127.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.042e-05, train_time=2.763
+[gpub001:0/64] 2023-07-15 00:29:30,975 (trainer:732) INFO: 51epoch:train:1801-1900batch: iter_time=1.016e-04, forward_time=0.146, loss_ctc=69.913, loss_att=50.332, acc=0.719, loss=56.206, backward_time=1.031, grad_norm=110.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.041e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 00:31:46,677 (trainer:732) INFO: 51epoch:train:1901-2000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.394, loss_att=48.413, acc=0.709, loss=53.208, backward_time=1.028, grad_norm=121.998, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.041e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 00:34:02,438 (trainer:732) INFO: 51epoch:train:2001-2100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=66.797, loss_att=48.245, acc=0.712, loss=53.810, backward_time=1.029, grad_norm=124.842, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.040e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 00:36:19,042 (trainer:732) INFO: 51epoch:train:2101-2200batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=83.907, loss_att=64.979, acc=0.707, loss=70.657, backward_time=1.034, grad_norm=153.393, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.040e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 00:38:36,795 (trainer:732) INFO: 51epoch:train:2201-2300batch: iter_time=1.176e-04, forward_time=0.146, loss_ctc=64.525, loss_att=48.786, acc=0.723, loss=53.508, backward_time=1.030, grad_norm=103.031, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.755
+[gpub001:0/64] 2023-07-15 00:40:52,543 (trainer:732) INFO: 51epoch:train:2301-2400batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=72.083, loss_att=51.081, acc=0.715, loss=57.381, backward_time=1.026, grad_norm=151.523, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.039e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 00:43:19,481 (trainer:732) INFO: 51epoch:train:2401-2500batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.723, loss_att=56.251, acc=0.711, loss=60.593, backward_time=1.041, grad_norm=144.872, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=2.939
+[gpub001:0/64] 2023-07-15 00:43:21,090 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 00:43:39,291 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 00:43:42,732 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb17dd7f40>)
+[gpub001:0/64] 2023-07-15 00:43:42,733 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 00:43:42,739 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 00:49:10,741 (trainer:732) INFO: 51epoch:train:2501-2600batch: iter_time=1.293, forward_time=0.188, loss_ctc=72.326, loss_att=55.154, acc=0.706, loss=60.305, backward_time=1.054, grad_norm=150.639, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.038e-05, train_time=7.025
+[gpub001:0/64] 2023-07-15 00:51:36,074 (trainer:732) INFO: 51epoch:train:2601-2700batch: iter_time=1.215e-04, forward_time=0.149, loss_ctc=78.155, loss_att=65.045, acc=0.701, loss=68.978, backward_time=1.038, grad_norm=133.517, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=2.907
+[gpub001:0/64] 2023-07-15 00:54:08,484 (trainer:732) INFO: 51epoch:train:2701-2800batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=68.157, loss_att=49.050, acc=0.717, loss=54.782, backward_time=1.051, grad_norm=120.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.037e-05, train_time=3.048
+[gpub001:0/64] 2023-07-15 00:56:50,168 (trainer:732) INFO: 51epoch:train:2801-2900batch: iter_time=1.083e-04, forward_time=0.145, loss_ctc=61.833, loss_att=46.996, acc=0.700, loss=51.447, backward_time=1.048, grad_norm=110.089, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.233
+[gpub001:0/64] 2023-07-15 00:59:24,641 (trainer:732) INFO: 51epoch:train:2901-3000batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=74.094, loss_att=49.328, acc=0.708, loss=56.758, backward_time=1.046, grad_norm=137.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.036e-05, train_time=3.089
+[gpub001:0/64] 2023-07-15 01:01:53,489 (trainer:732) INFO: 51epoch:train:3001-3100batch: iter_time=1.135e-04, forward_time=0.145, loss_ctc=81.243, loss_att=67.309, acc=0.698, loss=71.489, backward_time=1.042, grad_norm=131.285, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=2.977
+[gpub001:0/64] 2023-07-15 01:04:27,494 (trainer:732) INFO: 51epoch:train:3101-3200batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=61.668, loss_att=44.652, acc=0.728, loss=49.757, backward_time=1.047, grad_norm=103.749, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.035e-05, train_time=3.080
+[gpub001:0/64] 2023-07-15 01:06:55,343 (trainer:732) INFO: 51epoch:train:3201-3300batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=72.274, loss_att=53.996, acc=0.696, loss=59.479, backward_time=1.037, grad_norm=125.051, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=2.957
+[gpub001:0/64] 2023-07-15 01:08:01,140 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 01:08:19,465 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd82634f0>)
+[gpub001:0/64] 2023-07-15 01:08:22,895 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 01:08:22,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 01:13:05,704 (trainer:732) INFO: 51epoch:train:3301-3400batch: iter_time=2.009, forward_time=0.148, loss_ctc=75.223, loss_att=63.691, acc=0.710, loss=67.150, backward_time=1.051, grad_norm=142.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.034e-05, train_time=7.407
+[gpub001:0/64] 2023-07-15 01:15:22,767 (trainer:732) INFO: 51epoch:train:3401-3500batch: iter_time=1.216e-04, forward_time=0.147, loss_ctc=74.723, loss_att=61.360, acc=0.699, loss=65.369, backward_time=1.033, grad_norm=144.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 01:17:38,745 (trainer:732) INFO: 51epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.675, loss_att=49.439, acc=0.718, loss=54.310, backward_time=1.028, grad_norm=123.402, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.033e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 01:19:54,187 (trainer:732) INFO: 51epoch:train:3601-3700batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=60.673, loss_att=46.051, acc=0.693, loss=50.437, backward_time=1.026, grad_norm=100.622, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 01:22:10,043 (trainer:732) INFO: 51epoch:train:3701-3800batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=77.507, loss_att=54.217, acc=0.713, loss=61.204, backward_time=1.029, grad_norm=155.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.032e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 01:24:26,099 (trainer:732) INFO: 51epoch:train:3801-3900batch: iter_time=1.260e-04, forward_time=0.147, loss_ctc=77.820, loss_att=62.731, acc=0.702, loss=67.257, backward_time=1.031, grad_norm=134.212, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 01:26:42,048 (trainer:732) INFO: 51epoch:train:3901-4000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=64.291, loss_att=46.914, acc=0.712, loss=52.127, backward_time=1.028, grad_norm=125.741, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.031e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 01:28:57,962 (trainer:732) INFO: 51epoch:train:4001-4100batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.662, loss_att=55.242, acc=0.705, loss=59.868, backward_time=1.027, grad_norm=125.153, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.030e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 01:30:40,640 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 01:30:58,549 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 01:31:02,016 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed694160>)
+[gpub001:0/64] 2023-07-15 01:31:02,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 01:31:02,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 01:35:33,364 (trainer:732) INFO: 51epoch:train:4101-4200batch: iter_time=1.388, forward_time=0.180, loss_ctc=68.988, loss_att=53.972, acc=0.717, loss=58.477, backward_time=1.042, grad_norm=120.989, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.030e-05, train_time=7.908
+[gpub001:0/64] 2023-07-15 01:37:51,985 (trainer:732) INFO: 51epoch:train:4201-4300batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=74.833, loss_att=60.854, acc=0.715, loss=65.048, backward_time=1.037, grad_norm=133.761, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.772
+[gpub001:0/64] 2023-07-15 01:40:08,097 (trainer:732) INFO: 51epoch:train:4301-4400batch: iter_time=1.075e-04, forward_time=0.146, loss_ctc=69.438, loss_att=52.734, acc=0.721, loss=57.745, backward_time=1.029, grad_norm=123.930, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.029e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 01:42:25,511 (trainer:732) INFO: 51epoch:train:4401-4500batch: iter_time=1.121e-04, forward_time=0.155, loss_ctc=63.839, loss_att=48.472, acc=0.713, loss=53.082, backward_time=1.032, grad_norm=131.811, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.748
+[gpub001:0/64] 2023-07-15 01:44:40,951 (trainer:732) INFO: 51epoch:train:4501-4600batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=66.389, loss_att=47.906, acc=0.715, loss=53.451, backward_time=1.026, grad_norm=153.232, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.028e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 01:46:57,089 (trainer:732) INFO: 51epoch:train:4601-4700batch: iter_time=1.045e-04, forward_time=0.145, loss_ctc=83.957, loss_att=64.618, acc=0.707, loss=70.420, backward_time=1.030, grad_norm=166.949, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 01:49:12,741 (trainer:732) INFO: 51epoch:train:4701-4800batch: iter_time=9.881e-05, forward_time=0.145, loss_ctc=64.633, loss_att=48.052, acc=0.725, loss=53.027, backward_time=1.029, grad_norm=122.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.027e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 01:51:28,305 (trainer:732) INFO: 51epoch:train:4801-4900batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=69.030, loss_att=50.159, acc=0.722, loss=55.820, backward_time=1.027, grad_norm=117.971, clip=100.000, loss_scale=1.947e+32, optim_step_time=0.182, optim0_lr0=5.026e-05, train_time=2.711
+[gpub001:0/64] 2023-07-15 01:53:45,279 (trainer:732) INFO: 51epoch:train:4901-5000batch: iter_time=1.268e-04, forward_time=0.153, loss_ctc=70.500, loss_att=57.120, acc=0.709, loss=61.134, backward_time=1.030, grad_norm=119.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.026e-05, train_time=2.739
+[gpub001:0/64] 2023-07-15 01:53:49,960 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 01:54:07,620 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd993fee0>)
+[gpub001:0/64] 2023-07-15 01:54:11,021 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 01:54:11,028 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 02:00:36,494 (trainer:732) INFO: 51epoch:train:5001-5100batch: iter_time=1.321, forward_time=0.197, loss_ctc=71.796, loss_att=54.624, acc=0.720, loss=59.775, backward_time=1.042, grad_norm=142.285, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.025e-05, train_time=8.225
+[gpub001:0/64] 2023-07-15 02:02:53,486 (trainer:732) INFO: 51epoch:train:5101-5200batch: iter_time=1.192e-04, forward_time=0.148, loss_ctc=77.425, loss_att=64.117, acc=0.711, loss=68.109, backward_time=1.031, grad_norm=121.077, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 02:05:14,153 (trainer:732) INFO: 51epoch:train:5201-5300batch: iter_time=2.312e-04, forward_time=0.185, loss_ctc=68.549, loss_att=49.844, acc=0.725, loss=55.455, backward_time=1.034, grad_norm=126.463, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.024e-05, train_time=2.813
+[gpub001:0/64] 2023-07-15 02:07:30,780 (trainer:732) INFO: 51epoch:train:5301-5400batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=61.261, loss_att=47.446, acc=0.706, loss=51.590, backward_time=1.030, grad_norm=122.414, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.024e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 02:09:46,510 (trainer:732) INFO: 51epoch:train:5401-5500batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=71.938, loss_att=49.005, acc=0.717, loss=55.885, backward_time=1.027, grad_norm=145.363, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 02:12:02,889 (trainer:732) INFO: 51epoch:train:5501-5600batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=80.684, loss_att=65.569, acc=0.713, loss=70.103, backward_time=1.031, grad_norm=154.669, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.023e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 02:14:18,639 (trainer:732) INFO: 51epoch:train:5601-5700batch: iter_time=1.232e-04, forward_time=0.145, loss_ctc=60.971, loss_att=43.444, acc=0.734, loss=48.702, backward_time=1.028, grad_norm=110.491, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.022e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 02:16:42,195 (trainer:732) INFO: 51epoch:train:5701-5800batch: iter_time=1.226e-04, forward_time=0.206, loss_ctc=71.178, loss_att=52.973, acc=0.713, loss=58.434, backward_time=1.033, grad_norm=126.966, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.022e-05, train_time=2.870
+[gpub001:0/64] 2023-07-15 02:17:48,783 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 02:18:06,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbed67fe80>)
+[gpub001:0/64] 2023-07-15 02:18:10,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 02:18:10,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 02:24:23,285 (trainer:732) INFO: 51epoch:train:5801-5900batch: iter_time=3.167, forward_time=0.193, loss_ctc=73.954, loss_att=58.719, acc=0.720, loss=63.289, backward_time=1.046, grad_norm=120.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.021e-05, train_time=9.221
+[gpub001:0/64] 2023-07-15 02:26:42,799 (trainer:732) INFO: 51epoch:train:5901-6000batch: iter_time=1.365e-04, forward_time=0.148, loss_ctc=77.878, loss_att=59.660, acc=0.724, loss=65.125, backward_time=1.031, grad_norm=137.273, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.021e-05, train_time=2.791
+[gpub001:0/64] 2023-07-15 02:29:01,054 (trainer:732) INFO: 51epoch:train:6001-6100batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=71.477, loss_att=57.056, acc=0.717, loss=61.382, backward_time=1.032, grad_norm=132.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.765
+[gpub001:0/64] 2023-07-15 02:31:22,810 (trainer:732) INFO: 51epoch:train:6101-6200batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=63.470, loss_att=47.131, acc=0.713, loss=52.033, backward_time=1.043, grad_norm=145.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.020e-05, train_time=2.835
+[gpub001:0/64] 2023-07-15 02:33:49,092 (trainer:732) INFO: 51epoch:train:6201-6300batch: iter_time=1.456e-04, forward_time=0.146, loss_ctc=63.174, loss_att=46.054, acc=0.716, loss=51.190, backward_time=1.040, grad_norm=140.173, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.019e-05, train_time=2.925
+[gpub001:0/64] 2023-07-15 02:36:09,198 (trainer:732) INFO: 51epoch:train:6301-6400batch: iter_time=9.504e-05, forward_time=0.145, loss_ctc=81.068, loss_att=60.989, acc=0.711, loss=67.012, backward_time=1.039, grad_norm=146.913, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.019e-05, train_time=2.802
+[gpub001:0/64] 2023-07-15 02:38:28,073 (trainer:732) INFO: 51epoch:train:6401-6500batch: iter_time=9.582e-05, forward_time=0.145, loss_ctc=71.425, loss_att=53.746, acc=0.727, loss=59.050, backward_time=1.032, grad_norm=128.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.777
+[gpub001:0/64] 2023-07-15 02:40:43,948 (trainer:732) INFO: 51epoch:train:6501-6600batch: iter_time=1.048e-04, forward_time=0.144, loss_ctc=67.003, loss_att=49.566, acc=0.718, loss=54.797, backward_time=1.026, grad_norm=141.281, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.018e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 02:42:32,538 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 02:42:50,762 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb1791b4f0>)
+[gpub001:0/64] 2023-07-15 02:42:54,186 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 02:42:54,192 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 02:47:36,979 (trainer:732) INFO: 51epoch:train:6601-6700batch: iter_time=1.585, forward_time=0.162, loss_ctc=70.032, loss_att=58.956, acc=0.708, loss=62.279, backward_time=1.037, grad_norm=139.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.017e-05, train_time=8.260
+[gpub001:0/64] 2023-07-15 02:49:54,229 (trainer:732) INFO: 51epoch:train:6701-6800batch: iter_time=1.289e-04, forward_time=0.147, loss_ctc=75.545, loss_att=58.495, acc=0.711, loss=63.610, backward_time=1.033, grad_norm=141.681, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.017e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 02:52:10,761 (trainer:732) INFO: 51epoch:train:6801-6900batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=74.367, loss_att=61.655, acc=0.708, loss=65.468, backward_time=1.029, grad_norm=128.279, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.730
+[gpub001:0/64] 2023-07-15 02:54:26,812 (trainer:732) INFO: 51epoch:train:6901-7000batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=65.571, loss_att=49.989, acc=0.708, loss=54.664, backward_time=1.028, grad_norm=112.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.016e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 02:56:42,153 (trainer:732) INFO: 51epoch:train:7001-7100batch: iter_time=1.037e-04, forward_time=0.144, loss_ctc=56.492, loss_att=43.028, acc=0.701, loss=47.067, backward_time=1.025, grad_norm=116.572, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.707
+[gpub001:0/64] 2023-07-15 02:58:58,134 (trainer:732) INFO: 51epoch:train:7101-7200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.088, loss_att=59.885, acc=0.716, loss=67.146, backward_time=1.030, grad_norm=159.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.015e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 03:01:13,992 (trainer:732) INFO: 51epoch:train:7201-7300batch: iter_time=1.333e-04, forward_time=0.145, loss_ctc=72.488, loss_att=55.906, acc=0.708, loss=60.881, backward_time=1.029, grad_norm=126.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 03:03:29,592 (trainer:732) INFO: 51epoch:train:7301-7400batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=67.910, loss_att=51.640, acc=0.710, loss=56.521, backward_time=1.028, grad_norm=112.939, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.014e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 03:05:45,812 (trainer:732) INFO: 51epoch:train:7401-7500batch: iter_time=9.587e-05, forward_time=0.146, loss_ctc=69.895, loss_att=58.438, acc=0.703, loss=61.875, backward_time=1.030, grad_norm=112.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 03:06:00,836 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 03:06:19,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 03:06:22,592 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdc242174f0>)
+[gpub001:0/64] 2023-07-15 03:06:22,593 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 03:06:22,599 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 03:12:16,439 (trainer:732) INFO: 51epoch:train:7501-7600batch: iter_time=2.393, forward_time=0.145, loss_ctc=71.969, loss_att=54.560, acc=0.721, loss=59.782, backward_time=1.046, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.013e-05, train_time=7.812
+[gpub001:0/64] 2023-07-15 03:14:33,471 (trainer:732) INFO: 51epoch:train:7601-7700batch: iter_time=1.093e-04, forward_time=0.147, loss_ctc=76.969, loss_att=62.660, acc=0.719, loss=66.953, backward_time=1.033, grad_norm=136.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 03:16:50,003 (trainer:732) INFO: 51epoch:train:7701-7800batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=67.626, loss_att=48.666, acc=0.727, loss=54.354, backward_time=1.031, grad_norm=124.562, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.012e-05, train_time=2.730
+[gpub001:0/64] 2023-07-15 03:19:06,142 (trainer:732) INFO: 51epoch:train:7801-7900batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=61.458, loss_att=47.781, acc=0.708, loss=51.884, backward_time=1.029, grad_norm=132.908, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 03:21:21,748 (trainer:732) INFO: 51epoch:train:7901-8000batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=73.377, loss_att=49.099, acc=0.720, loss=56.382, backward_time=1.026, grad_norm=145.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.011e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 03:23:41,893 (trainer:732) INFO: 51epoch:train:8001-8100batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=80.253, loss_att=65.949, acc=0.710, loss=70.240, backward_time=1.043, grad_norm=118.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.803
+[gpub001:0/64] 2023-07-15 03:25:57,929 (trainer:732) INFO: 51epoch:train:8101-8200batch: iter_time=1.161e-04, forward_time=0.146, loss_ctc=59.833, loss_att=43.945, acc=0.737, loss=48.712, backward_time=1.028, grad_norm=97.269, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.010e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 03:28:22,509 (trainer:732) INFO: 51epoch:train:8201-8300batch: iter_time=4.203e-04, forward_time=0.206, loss_ctc=70.476, loss_att=52.382, acc=0.712, loss=57.810, backward_time=1.036, grad_norm=124.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.009e-05, train_time=2.891
+[gpub001:0/64] 2023-07-15 03:29:12,660 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 03:29:30,661 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8efe53a30>)
+[gpub001:0/64] 2023-07-15 03:29:34,357 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 03:29:34,364 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 03:34:24,246 (trainer:732) INFO: 51epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.183, loss_ctc=74.757, loss_att=62.043, acc=0.719, loss=65.857, backward_time=1.043, grad_norm=136.488, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.009e-05, train_time=7.234
+[gpub001:0/64] 2023-07-15 03:36:41,508 (trainer:732) INFO: 51epoch:train:8401-8500batch: iter_time=9.487e-05, forward_time=0.147, loss_ctc=73.152, loss_att=60.272, acc=0.703, loss=64.136, backward_time=1.032, grad_norm=130.904, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 03:38:58,154 (trainer:732) INFO: 51epoch:train:8501-8600batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=66.048, loss_att=49.450, acc=0.717, loss=54.429, backward_time=1.029, grad_norm=115.954, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.008e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 03:41:13,765 (trainer:732) INFO: 51epoch:train:8601-8700batch: iter_time=9.860e-05, forward_time=0.143, loss_ctc=60.580, loss_att=45.944, acc=0.699, loss=50.334, backward_time=1.024, grad_norm=132.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.007e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 03:43:30,063 (trainer:732) INFO: 51epoch:train:8701-8800batch: iter_time=1.299e-04, forward_time=0.147, loss_ctc=75.928, loss_att=53.765, acc=0.716, loss=60.414, backward_time=1.030, grad_norm=155.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 03:45:21,567 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 03:45:45,981 (trainer:732) INFO: 51epoch:train:8801-8900batch: iter_time=1.266e-04, forward_time=0.147, loss_ctc=77.868, loss_att=61.551, acc=0.707, loss=66.446, backward_time=1.029, grad_norm=134.524, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.006e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 03:48:03,191 (trainer:732) INFO: 51epoch:train:8901-9000batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=64.524, loss_att=46.573, acc=0.715, loss=51.958, backward_time=1.029, grad_norm=138.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.006e-05, train_time=2.744
+[gpub001:0/64] 2023-07-15 03:50:20,560 (trainer:732) INFO: 51epoch:train:9001-9100batch: iter_time=1.038e-04, forward_time=0.146, loss_ctc=70.040, loss_att=54.802, acc=0.706, loss=59.373, backward_time=1.032, grad_norm=151.464, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=2.747
+[gpub001:0/64] 2023-07-15 03:51:53,225 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 03:52:11,289 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9ccf490>)
+[gpub001:0/64] 2023-07-15 03:52:14,696 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 03:52:14,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 03:57:27,451 (trainer:732) INFO: 51epoch:train:9101-9200batch: iter_time=1.306, forward_time=0.144, loss_ctc=65.711, loss_att=50.921, acc=0.714, loss=55.358, backward_time=1.041, grad_norm=118.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.005e-05, train_time=8.538
+[gpub001:0/64] 2023-07-15 03:59:49,919 (trainer:732) INFO: 51epoch:train:9201-9300batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.976, loss_att=57.729, acc=0.724, loss=63.203, backward_time=1.042, grad_norm=148.716, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.004e-05, train_time=2.849
+[gpub001:0/64] 2023-07-15 04:02:07,659 (trainer:732) INFO: 51epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.148, loss_ctc=74.558, loss_att=59.518, acc=0.719, loss=64.030, backward_time=1.034, grad_norm=124.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.755
+[gpub001:0/64] 2023-07-15 04:04:27,052 (trainer:732) INFO: 51epoch:train:9401-9500batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=64.717, loss_att=49.875, acc=0.718, loss=54.328, backward_time=1.038, grad_norm=132.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.003e-05, train_time=2.788
+[gpub001:0/64] 2023-07-15 04:06:45,446 (trainer:732) INFO: 51epoch:train:9501-9600batch: iter_time=9.616e-05, forward_time=0.145, loss_ctc=55.849, loss_att=42.233, acc=0.712, loss=46.317, backward_time=1.030, grad_norm=119.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.768
+[gpub001:0/64] 2023-07-15 04:09:01,853 (trainer:732) INFO: 51epoch:train:9601-9700batch: iter_time=1.058e-04, forward_time=0.147, loss_ctc=82.299, loss_att=58.433, acc=0.727, loss=65.593, backward_time=1.032, grad_norm=158.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.002e-05, train_time=2.728
+[gpub001:0/64] 2023-07-15 04:11:21,471 (trainer:732) INFO: 51epoch:train:9701-9800batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=73.290, loss_att=56.427, acc=0.712, loss=61.486, backward_time=1.036, grad_norm=130.108, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.792
+[gpub001:0/64] 2023-07-15 04:13:37,214 (trainer:732) INFO: 51epoch:train:9801-9900batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=67.601, loss_att=50.730, acc=0.720, loss=55.791, backward_time=1.028, grad_norm=117.358, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.001e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 04:15:53,312 (trainer:732) INFO: 51epoch:train:9901-10000batch: iter_time=9.479e-05, forward_time=0.147, loss_ctc=68.884, loss_att=56.769, acc=0.722, loss=60.403, backward_time=1.029, grad_norm=111.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.000e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 04:28:17,825 (trainer:338) INFO: 51epoch results: [train] iter_time=0.271, forward_time=0.150, loss_ctc=70.867, loss_att=54.317, acc=0.712, loss=59.282, backward_time=1.033, grad_norm=130.980, clip=100.000, loss_scale=2.538e+32, optim_step_time=0.182, optim0_lr0=5.025e-05, train_time=3.480, time=4 hours, 50 minutes and 9.54 seconds, total_count=480000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.872, cer_ctc=0.251, loss_att=38.738, acc=0.678, cer=0.404, wer=0.996, loss=39.978, time=6 minutes and 25.08 seconds, total_count=49082, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 04:28:33,383 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/46epoch.pth
+[gpub001:0/64] 2023-07-15 04:28:33,396 (trainer:272) INFO: 52/60epoch started. Estimated time to finish: 1 day, 20 hours and 51 minutes
+[gpub001:0/64] 2023-07-15 04:28:33,399 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 04:28:51,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5d5770eb0>)
+[gpub001:0/64] 2023-07-15 04:28:54,528 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 04:28:54,534 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 04:35:34,681 (trainer:732) INFO: 52epoch:train:1-100batch: iter_time=2.800, forward_time=0.165, loss_ctc=69.950, loss_att=52.193, acc=0.709, loss=57.520, backward_time=1.039, grad_norm=115.030, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.000e-05, train_time=8.425
+[gpub001:0/64] 2023-07-15 04:37:51,420 (trainer:732) INFO: 52epoch:train:101-200batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=69.058, loss_att=52.313, acc=0.715, loss=57.337, backward_time=1.032, grad_norm=143.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.999e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 04:40:09,108 (trainer:732) INFO: 52epoch:train:201-300batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=69.512, loss_att=51.083, acc=0.718, loss=56.611, backward_time=1.032, grad_norm=136.816, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.999e-05, train_time=2.754
+[gpub001:0/64] 2023-07-15 04:42:29,211 (trainer:732) INFO: 52epoch:train:301-400batch: iter_time=1.408e-04, forward_time=0.146, loss_ctc=77.821, loss_att=61.168, acc=0.703, loss=66.164, backward_time=1.035, grad_norm=151.115, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.802
+[gpub001:0/64] 2023-07-15 04:44:49,281 (trainer:732) INFO: 52epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=59.969, loss_att=45.427, acc=0.721, loss=49.790, backward_time=1.032, grad_norm=126.820, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.998e-05, train_time=2.801
+[gpub001:0/64] 2023-07-15 04:47:08,640 (trainer:732) INFO: 52epoch:train:501-600batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=61.323, loss_att=48.328, acc=0.726, loss=52.227, backward_time=1.037, grad_norm=125.310, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.787
+[gpub001:0/64] 2023-07-15 04:49:31,134 (trainer:732) INFO: 52epoch:train:601-700batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=75.112, loss_att=63.431, acc=0.711, loss=66.935, backward_time=1.054, grad_norm=142.319, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.997e-05, train_time=2.850
+[gpub001:0/64] 2023-07-15 04:51:49,592 (trainer:732) INFO: 52epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=75.882, loss_att=61.266, acc=0.716, loss=65.651, backward_time=1.033, grad_norm=112.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.769
+[gpub001:0/64] 2023-07-15 04:52:41,328 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 04:52:59,420 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5d57de0e0>)
+[gpub001:0/64] 2023-07-15 04:53:02,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 04:53:02,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 04:57:23,419 (trainer:732) INFO: 52epoch:train:801-900batch: iter_time=1.338, forward_time=0.237, loss_ctc=68.461, loss_att=51.350, acc=0.709, loss=56.483, backward_time=1.050, grad_norm=146.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.996e-05, train_time=6.676
+[gpub001:0/64] 2023-07-15 04:59:41,599 (trainer:732) INFO: 52epoch:train:901-1000batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=69.156, loss_att=58.040, acc=0.702, loss=61.375, backward_time=1.029, grad_norm=139.715, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.996e-05, train_time=2.764
+[gpub001:0/64] 2023-07-15 05:01:57,285 (trainer:732) INFO: 52epoch:train:1001-1100batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=71.143, loss_att=50.487, acc=0.721, loss=56.684, backward_time=1.029, grad_norm=117.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 05:04:13,647 (trainer:732) INFO: 52epoch:train:1101-1200batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.174, loss_att=57.692, acc=0.712, loss=62.637, backward_time=1.030, grad_norm=138.130, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.995e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 05:06:29,733 (trainer:732) INFO: 52epoch:train:1201-1300batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=57.362, loss_att=42.969, acc=0.723, loss=47.287, backward_time=1.029, grad_norm=118.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 05:08:45,836 (trainer:732) INFO: 52epoch:train:1301-1400batch: iter_time=1.205e-04, forward_time=0.149, loss_ctc=65.213, loss_att=52.131, acc=0.732, loss=56.055, backward_time=1.030, grad_norm=140.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.994e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 05:11:02,183 (trainer:732) INFO: 52epoch:train:1401-1500batch: iter_time=1.198e-04, forward_time=0.149, loss_ctc=76.107, loss_att=62.021, acc=0.715, loss=66.247, backward_time=1.032, grad_norm=127.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 05:13:18,216 (trainer:732) INFO: 52epoch:train:1501-1600batch: iter_time=1.083e-04, forward_time=0.148, loss_ctc=66.997, loss_att=54.395, acc=0.727, loss=58.175, backward_time=1.030, grad_norm=117.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.993e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 05:14:59,026 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 05:15:17,049 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcf328c3d60>)
+[gpub001:0/64] 2023-07-15 05:15:20,571 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 05:15:20,578 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 05:22:59,109 (trainer:732) INFO: 52epoch:train:1601-1700batch: iter_time=4.399, forward_time=0.169, loss_ctc=76.666, loss_att=57.348, acc=0.713, loss=63.143, backward_time=1.040, grad_norm=154.498, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.992e-05, train_time=11.617
+[gpub001:0/64] 2023-07-15 05:25:16,028 (trainer:732) INFO: 52epoch:train:1701-1800batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=68.875, loss_att=53.803, acc=0.711, loss=58.325, backward_time=1.031, grad_norm=128.396, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.992e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 05:27:31,940 (trainer:732) INFO: 52epoch:train:1801-1900batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=72.358, loss_att=52.034, acc=0.719, loss=58.132, backward_time=1.028, grad_norm=128.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 05:29:52,478 (trainer:732) INFO: 52epoch:train:1901-2000batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=74.207, loss_att=56.605, acc=0.713, loss=61.886, backward_time=1.047, grad_norm=146.145, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.991e-05, train_time=2.811
+[gpub001:0/64] 2023-07-15 05:32:13,571 (trainer:732) INFO: 52epoch:train:2001-2100batch: iter_time=1.101e-04, forward_time=0.146, loss_ctc=60.906, loss_att=47.967, acc=0.726, loss=51.849, backward_time=1.035, grad_norm=150.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.822
+[gpub001:0/64] 2023-07-15 05:34:34,015 (trainer:732) INFO: 52epoch:train:2101-2200batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=59.010, loss_att=46.764, acc=0.727, loss=50.438, backward_time=1.032, grad_norm=205.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.990e-05, train_time=2.809
+[gpub001:0/64] 2023-07-15 05:37:07,034 (trainer:732) INFO: 52epoch:train:2201-2300batch: iter_time=0.005, forward_time=0.207, loss_ctc=72.052, loss_att=58.747, acc=0.714, loss=62.738, backward_time=1.065, grad_norm=153.377, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.212, optim0_lr0=4.989e-05, train_time=3.058
+[gpub001:0/64] 2023-07-15 05:39:24,955 (trainer:732) INFO: 52epoch:train:2301-2400batch: iter_time=1.118e-04, forward_time=0.148, loss_ctc=77.542, loss_att=63.924, acc=0.722, loss=68.010, backward_time=1.031, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.989e-05, train_time=2.760
+[gpub001:0/64] 2023-07-15 05:41:45,240 (trainer:732) INFO: 52epoch:train:2401-2500batch: iter_time=1.128e-04, forward_time=0.144, loss_ctc=69.658, loss_att=49.632, acc=0.723, loss=55.639, backward_time=1.028, grad_norm=121.430, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.988e-05, train_time=2.805
+[gpub001:0/64] 2023-07-15 05:41:58,122 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 05:42:16,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf6867fa0>)
+[gpub001:0/64] 2023-07-15 05:42:19,646 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 05:42:19,652 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 05:47:06,685 (trainer:732) INFO: 52epoch:train:2501-2600batch: iter_time=1.721, forward_time=0.145, loss_ctc=68.445, loss_att=51.045, acc=0.711, loss=56.265, backward_time=1.045, grad_norm=122.585, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.988e-05, train_time=6.429
+[gpub001:0/64] 2023-07-15 05:49:23,184 (trainer:732) INFO: 52epoch:train:2601-2700batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=68.564, loss_att=53.136, acc=0.713, loss=57.764, backward_time=1.031, grad_norm=117.186, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.987e-05, train_time=2.730
+[gpub001:0/64] 2023-07-15 05:51:38,947 (trainer:732) INFO: 52epoch:train:2701-2800batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=68.961, loss_att=48.677, acc=0.721, loss=54.762, backward_time=1.027, grad_norm=134.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.987e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 05:53:54,877 (trainer:732) INFO: 52epoch:train:2801-2900batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=77.652, loss_att=61.174, acc=0.707, loss=66.117, backward_time=1.028, grad_norm=169.753, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 05:56:10,325 (trainer:732) INFO: 52epoch:train:2901-3000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=59.198, loss_att=43.540, acc=0.729, loss=48.237, backward_time=1.026, grad_norm=130.238, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.986e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 05:58:26,241 (trainer:732) INFO: 52epoch:train:3001-3100batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=61.246, loss_att=47.337, acc=0.732, loss=51.510, backward_time=1.029, grad_norm=113.002, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 06:00:42,516 (trainer:732) INFO: 52epoch:train:3101-3200batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=73.840, loss_att=62.268, acc=0.715, loss=65.740, backward_time=1.030, grad_norm=132.126, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.985e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 06:02:01,422 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 06:02:58,632 (trainer:732) INFO: 52epoch:train:3201-3300batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=72.796, loss_att=59.618, acc=0.723, loss=63.572, backward_time=1.031, grad_norm=118.588, clip=100.000, loss_scale=5.100e+32, optim_step_time=0.182, optim0_lr0=4.984e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 06:03:46,609 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 06:04:04,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8f312f7f0>)
+[gpub001:0/64] 2023-07-15 06:04:08,402 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 06:04:08,408 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 06:08:26,592 (trainer:732) INFO: 52epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.146, loss_ctc=67.963, loss_att=50.761, acc=0.709, loss=55.921, backward_time=1.042, grad_norm=138.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.984e-05, train_time=6.559
+[gpub001:0/64] 2023-07-15 06:10:43,347 (trainer:732) INFO: 52epoch:train:3401-3500batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=68.987, loss_att=57.616, acc=0.701, loss=61.027, backward_time=1.030, grad_norm=121.698, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 06:12:59,043 (trainer:732) INFO: 52epoch:train:3501-3600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=70.640, loss_att=49.445, acc=0.720, loss=55.804, backward_time=1.027, grad_norm=163.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.983e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 06:15:17,290 (trainer:732) INFO: 52epoch:train:3601-3700batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=72.309, loss_att=57.367, acc=0.701, loss=61.849, backward_time=1.036, grad_norm=192.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.765
+[gpub001:0/64] 2023-07-15 06:17:32,908 (trainer:732) INFO: 52epoch:train:3701-3800batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=56.135, loss_att=43.432, acc=0.725, loss=47.242, backward_time=1.027, grad_norm=120.250, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.982e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 06:19:48,547 (trainer:732) INFO: 52epoch:train:3801-3900batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=64.984, loss_att=53.140, acc=0.717, loss=56.693, backward_time=1.026, grad_norm=124.613, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 06:22:04,604 (trainer:732) INFO: 52epoch:train:3901-4000batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=75.460, loss_att=63.239, acc=0.704, loss=66.905, backward_time=1.030, grad_norm=124.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.981e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 06:24:20,473 (trainer:732) INFO: 52epoch:train:4001-4100batch: iter_time=1.162e-04, forward_time=0.145, loss_ctc=68.466, loss_att=55.208, acc=0.726, loss=59.186, backward_time=1.028, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.980e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 06:25:55,117 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 06:26:13,418 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf77b74f0>)
+[gpub001:0/64] 2023-07-15 06:26:16,916 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 06:26:16,923 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 06:30:23,969 (trainer:732) INFO: 52epoch:train:4101-4200batch: iter_time=1.316, forward_time=0.182, loss_ctc=70.188, loss_att=50.115, acc=0.713, loss=56.137, backward_time=1.038, grad_norm=155.015, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.980e-05, train_time=7.268
+[gpub001:0/64] 2023-07-15 06:32:52,217 (trainer:732) INFO: 52epoch:train:4201-4300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=69.316, loss_att=51.445, acc=0.717, loss=56.806, backward_time=1.043, grad_norm=137.794, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.966
+[gpub001:0/64] 2023-07-15 06:35:08,342 (trainer:732) INFO: 52epoch:train:4301-4400batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=72.977, loss_att=57.668, acc=0.705, loss=62.261, backward_time=1.029, grad_norm=141.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.979e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 06:37:26,814 (trainer:732) INFO: 52epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=63.643, loss_att=45.419, acc=0.719, loss=50.886, backward_time=1.056, grad_norm=113.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=2.769
+[gpub001:0/64] 2023-07-15 06:39:57,698 (trainer:732) INFO: 52epoch:train:4501-4600batch: iter_time=1.134e-04, forward_time=0.147, loss_ctc=75.027, loss_att=58.300, acc=0.701, loss=63.318, backward_time=1.043, grad_norm=180.474, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.978e-05, train_time=3.017
+[gpub001:0/64] 2023-07-15 06:42:13,491 (trainer:732) INFO: 52epoch:train:4601-4700batch: iter_time=1.131e-04, forward_time=0.147, loss_ctc=56.007, loss_att=43.374, acc=0.727, loss=47.164, backward_time=1.027, grad_norm=140.554, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.977e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 06:44:29,393 (trainer:732) INFO: 52epoch:train:4701-4800batch: iter_time=1.151e-04, forward_time=0.147, loss_ctc=67.817, loss_att=52.699, acc=0.717, loss=57.235, backward_time=1.029, grad_norm=125.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.977e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 06:46:45,318 (trainer:732) INFO: 52epoch:train:4801-4900batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=73.382, loss_att=62.579, acc=0.709, loss=65.820, backward_time=1.029, grad_norm=138.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 06:49:01,133 (trainer:732) INFO: 52epoch:train:4901-5000batch: iter_time=1.282e-04, forward_time=0.148, loss_ctc=67.029, loss_att=51.868, acc=0.723, loss=56.416, backward_time=1.028, grad_norm=126.438, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.976e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 06:49:16,564 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 06:49:34,812 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdb17d8b4f0>)
+[gpub001:0/64] 2023-07-15 06:49:38,218 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 06:49:38,224 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 06:55:32,657 (trainer:732) INFO: 52epoch:train:5001-5100batch: iter_time=2.462, forward_time=0.167, loss_ctc=69.941, loss_att=52.394, acc=0.707, loss=57.658, backward_time=1.039, grad_norm=123.555, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=7.830
+[gpub001:0/64] 2023-07-15 06:57:48,838 (trainer:732) INFO: 52epoch:train:5101-5200batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=67.509, loss_att=53.022, acc=0.712, loss=57.368, backward_time=1.027, grad_norm=124.915, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.975e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 07:00:08,648 (trainer:732) INFO: 52epoch:train:5201-5300batch: iter_time=1.262e-04, forward_time=0.145, loss_ctc=66.544, loss_att=46.869, acc=0.728, loss=52.771, backward_time=1.037, grad_norm=123.961, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 07:02:28,465 (trainer:732) INFO: 52epoch:train:5301-5400batch: iter_time=1.234e-04, forward_time=0.147, loss_ctc=76.176, loss_att=59.538, acc=0.700, loss=64.529, backward_time=1.041, grad_norm=207.376, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.974e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 07:04:57,605 (trainer:732) INFO: 52epoch:train:5401-5500batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=59.021, loss_att=44.631, acc=0.724, loss=48.948, backward_time=1.045, grad_norm=134.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.983
+[gpub001:0/64] 2023-07-15 07:07:19,312 (trainer:732) INFO: 52epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=60.113, loss_att=46.606, acc=0.727, loss=50.658, backward_time=1.044, grad_norm=107.221, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.973e-05, train_time=2.834
+[gpub001:0/64] 2023-07-15 07:09:42,242 (trainer:732) INFO: 52epoch:train:5601-5700batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=74.213, loss_att=63.764, acc=0.702, loss=66.899, backward_time=1.047, grad_norm=124.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.858
+[gpub001:0/64] 2023-07-15 07:11:58,361 (trainer:732) INFO: 52epoch:train:5701-5800batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=72.924, loss_att=58.764, acc=0.720, loss=63.012, backward_time=1.029, grad_norm=109.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.972e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 07:12:46,518 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 07:13:04,771 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf64ae800>)
+[gpub001:0/64] 2023-07-15 07:13:08,290 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 07:13:08,297 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 07:17:17,109 (trainer:732) INFO: 52epoch:train:5801-5900batch: iter_time=1.320, forward_time=0.194, loss_ctc=67.235, loss_att=50.707, acc=0.707, loss=55.665, backward_time=1.041, grad_norm=126.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.971e-05, train_time=6.375
+[gpub001:0/64] 2023-07-15 07:19:34,139 (trainer:732) INFO: 52epoch:train:5901-6000batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=67.571, loss_att=55.834, acc=0.707, loss=59.355, backward_time=1.028, grad_norm=119.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.971e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 07:21:50,446 (trainer:732) INFO: 52epoch:train:6001-6100batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=69.750, loss_att=48.735, acc=0.721, loss=55.039, backward_time=1.028, grad_norm=137.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 07:24:05,901 (trainer:732) INFO: 52epoch:train:6101-6200batch: iter_time=1.260e-04, forward_time=0.143, loss_ctc=74.044, loss_att=56.325, acc=0.704, loss=61.641, backward_time=1.026, grad_norm=136.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.970e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 07:26:21,636 (trainer:732) INFO: 52epoch:train:6201-6300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=55.855, loss_att=43.352, acc=0.724, loss=47.103, backward_time=1.028, grad_norm=115.672, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 07:28:42,756 (trainer:732) INFO: 52epoch:train:6301-6400batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.969, loss_att=51.098, acc=0.723, loss=54.659, backward_time=1.046, grad_norm=130.164, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.969e-05, train_time=2.822
+[gpub001:0/64] 2023-07-15 07:31:00,939 (trainer:732) INFO: 52epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.147, loss_ctc=75.551, loss_att=63.770, acc=0.701, loss=67.304, backward_time=1.032, grad_norm=129.815, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.968e-05, train_time=2.763
+[gpub001:0/64] 2023-07-15 07:33:28,577 (trainer:732) INFO: 52epoch:train:6501-6600batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=68.149, loss_att=54.330, acc=0.725, loss=58.476, backward_time=1.036, grad_norm=113.777, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.968e-05, train_time=2.953
+[gpub001:0/64] 2023-07-15 07:35:03,598 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 07:35:21,711 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf64e77f0>)
+[gpub001:0/64] 2023-07-15 07:35:25,095 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 07:35:25,101 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 07:39:15,653 (trainer:732) INFO: 52epoch:train:6601-6700batch: iter_time=1.307, forward_time=0.166, loss_ctc=70.728, loss_att=49.460, acc=0.717, loss=55.840, backward_time=1.039, grad_norm=123.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=6.941
+[gpub001:0/64] 2023-07-15 07:41:33,326 (trainer:732) INFO: 52epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.145, loss_ctc=68.471, loss_att=51.019, acc=0.714, loss=56.255, backward_time=1.034, grad_norm=118.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.967e-05, train_time=2.754
+[gpub001:0/64] 2023-07-15 07:43:49,511 (trainer:732) INFO: 52epoch:train:6801-6900batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=72.102, loss_att=56.769, acc=0.710, loss=61.369, backward_time=1.028, grad_norm=153.381, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 07:46:08,170 (trainer:732) INFO: 52epoch:train:6901-7000batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=62.933, loss_att=45.330, acc=0.724, loss=50.611, backward_time=1.039, grad_norm=115.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.966e-05, train_time=2.773
+[gpub001:0/64] 2023-07-15 07:48:28,609 (trainer:732) INFO: 52epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=76.028, loss_att=57.030, acc=0.707, loss=62.729, backward_time=1.034, grad_norm=162.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.809
+[gpub001:0/64] 2023-07-15 07:50:46,730 (trainer:732) INFO: 52epoch:train:7101-7200batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=56.737, loss_att=43.740, acc=0.729, loss=47.639, backward_time=1.031, grad_norm=117.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.965e-05, train_time=2.762
+[gpub001:0/64] 2023-07-15 07:53:15,548 (trainer:732) INFO: 52epoch:train:7201-7300batch: iter_time=1.316e-04, forward_time=0.146, loss_ctc=67.374, loss_att=53.427, acc=0.716, loss=57.611, backward_time=1.037, grad_norm=133.942, clip=100.000, loss_scale=4.608e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.976
+[gpub001:0/64] 2023-07-15 07:55:26,256 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 07:55:39,865 (trainer:732) INFO: 52epoch:train:7301-7400batch: iter_time=1.232e-04, forward_time=0.148, loss_ctc=73.022, loss_att=61.882, acc=0.708, loss=65.224, backward_time=1.056, grad_norm=127.732, clip=100.000, loss_scale=6.159e+32, optim_step_time=0.182, optim0_lr0=4.964e-05, train_time=2.886
+[gpub001:0/64] 2023-07-15 07:58:00,543 (trainer:732) INFO: 52epoch:train:7401-7500batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=65.887, loss_att=51.810, acc=0.721, loss=56.033, backward_time=1.031, grad_norm=112.643, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=2.813
+[gpub001:0/64] 2023-07-15 07:58:11,614 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 07:58:29,689 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca5d71ffd0>)
+[gpub001:0/64] 2023-07-15 07:58:33,138 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 07:58:33,144 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 08:04:38,481 (trainer:732) INFO: 52epoch:train:7501-7600batch: iter_time=2.524, forward_time=0.173, loss_ctc=68.274, loss_att=52.729, acc=0.716, loss=57.393, backward_time=1.043, grad_norm=122.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.963e-05, train_time=7.958
+[gpub001:0/64] 2023-07-15 08:06:55,378 (trainer:732) INFO: 52epoch:train:7601-7700batch: iter_time=1.197e-04, forward_time=0.150, loss_ctc=66.701, loss_att=51.740, acc=0.717, loss=56.228, backward_time=1.030, grad_norm=132.874, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.962e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 08:09:14,728 (trainer:732) INFO: 52epoch:train:7701-7800batch: iter_time=1.194e-04, forward_time=0.163, loss_ctc=66.481, loss_att=47.413, acc=0.730, loss=53.133, backward_time=1.030, grad_norm=123.853, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=4.962e-05, train_time=2.787
+[gpub001:0/64] 2023-07-15 08:11:09,763 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 08:11:31,528 (trainer:732) INFO: 52epoch:train:7801-7900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=76.969, loss_att=61.099, acc=0.712, loss=65.860, backward_time=1.030, grad_norm=147.788, clip=100.000, loss_scale=2.980e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 08:13:47,018 (trainer:732) INFO: 52epoch:train:7901-8000batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=58.050, loss_att=42.970, acc=0.735, loss=47.494, backward_time=1.025, grad_norm=116.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.961e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 08:16:02,954 (trainer:732) INFO: 52epoch:train:8001-8100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=60.651, loss_att=47.371, acc=0.734, loss=51.355, backward_time=1.027, grad_norm=123.406, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.960e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 08:18:19,062 (trainer:732) INFO: 52epoch:train:8101-8200batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=72.709, loss_att=62.133, acc=0.722, loss=65.306, backward_time=1.028, grad_norm=112.891, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 08:20:35,779 (trainer:732) INFO: 52epoch:train:8201-8300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=72.592, loss_att=59.622, acc=0.726, loss=63.513, backward_time=1.028, grad_norm=112.657, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.960e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 08:21:28,081 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 08:21:46,305 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fce991aae60>)
+[gpub001:0/64] 2023-07-15 08:21:50,018 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 08:21:50,025 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 08:27:03,659 (trainer:732) INFO: 52epoch:train:8301-8400batch: iter_time=1.383, forward_time=0.161, loss_ctc=63.582, loss_att=46.401, acc=0.711, loss=51.555, backward_time=1.043, grad_norm=110.495, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.959e-05, train_time=7.757
+[gpub001:0/64] 2023-07-15 08:29:20,503 (trainer:732) INFO: 52epoch:train:8401-8500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=71.706, loss_att=52.790, acc=0.718, loss=58.465, backward_time=1.029, grad_norm=132.484, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.959e-05, train_time=2.737
+[gpub001:0/64] 2023-07-15 08:31:36,508 (trainer:732) INFO: 52epoch:train:8501-8600batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=69.582, loss_att=53.043, acc=0.721, loss=58.005, backward_time=1.030, grad_norm=129.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 08:33:53,300 (trainer:732) INFO: 52epoch:train:8601-8700batch: iter_time=1.138e-04, forward_time=0.146, loss_ctc=63.441, loss_att=46.306, acc=0.719, loss=51.447, backward_time=1.030, grad_norm=134.435, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.958e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 08:36:12,547 (trainer:732) INFO: 52epoch:train:8701-8800batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=72.367, loss_att=57.240, acc=0.706, loss=61.778, backward_time=1.040, grad_norm=144.562, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.785
+[gpub001:0/64] 2023-07-15 08:38:28,158 (trainer:732) INFO: 52epoch:train:8801-8900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=54.536, loss_att=41.153, acc=0.739, loss=45.168, backward_time=1.028, grad_norm=114.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.957e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 08:40:58,526 (trainer:732) INFO: 52epoch:train:8901-9000batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=69.535, loss_att=56.601, acc=0.709, loss=60.481, backward_time=1.044, grad_norm=139.440, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=3.007
+[gpub001:0/64] 2023-07-15 08:43:14,542 (trainer:732) INFO: 52epoch:train:9001-9100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.360, loss_att=63.389, acc=0.715, loss=67.281, backward_time=1.029, grad_norm=114.369, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.956e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 08:44:48,439 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 08:45:06,514 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca5d708940>)
+[gpub001:0/64] 2023-07-15 08:45:09,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 08:45:09,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 08:50:38,705 (trainer:732) INFO: 52epoch:train:9101-9200batch: iter_time=1.386, forward_time=0.204, loss_ctc=64.693, loss_att=46.910, acc=0.724, loss=52.245, backward_time=1.042, grad_norm=132.581, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.955e-05, train_time=8.882
+[gpub001:0/64] 2023-07-15 08:52:56,341 (trainer:732) INFO: 52epoch:train:9201-9300batch: iter_time=1.218e-04, forward_time=0.149, loss_ctc=67.583, loss_att=50.815, acc=0.725, loss=55.846, backward_time=1.032, grad_norm=120.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.955e-05, train_time=2.753
+[gpub001:0/64] 2023-07-15 08:55:13,695 (trainer:732) INFO: 52epoch:train:9301-9400batch: iter_time=1.130e-04, forward_time=0.149, loss_ctc=72.507, loss_att=57.099, acc=0.717, loss=61.721, backward_time=1.030, grad_norm=148.100, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.747
+[gpub001:0/64] 2023-07-15 08:57:29,816 (trainer:732) INFO: 52epoch:train:9401-9500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=61.723, loss_att=44.944, acc=0.730, loss=49.978, backward_time=1.028, grad_norm=107.006, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.954e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 08:59:46,404 (trainer:732) INFO: 52epoch:train:9501-9600batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=73.882, loss_att=57.515, acc=0.715, loss=62.425, backward_time=1.031, grad_norm=134.396, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 09:02:02,343 (trainer:732) INFO: 52epoch:train:9601-9700batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=57.834, loss_att=43.892, acc=0.733, loss=48.075, backward_time=1.027, grad_norm=105.315, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.953e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 09:04:18,628 (trainer:732) INFO: 52epoch:train:9701-9800batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=69.046, loss_att=55.521, acc=0.724, loss=59.578, backward_time=1.028, grad_norm=134.539, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 09:06:40,864 (trainer:732) INFO: 52epoch:train:9801-9900batch: iter_time=1.086e-04, forward_time=0.146, loss_ctc=71.846, loss_att=59.131, acc=0.721, loss=62.945, backward_time=1.034, grad_norm=118.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.952e-05, train_time=2.844
+[gpub001:0/64] 2023-07-15 09:08:57,505 (trainer:732) INFO: 52epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.148, loss_ctc=67.461, loss_att=52.187, acc=0.727, loss=56.769, backward_time=1.031, grad_norm=132.078, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.951e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 09:23:19,047 (trainer:338) INFO: 52epoch results: [train] iter_time=0.232, forward_time=0.151, loss_ctc=68.405, loss_att=53.118, acc=0.717, loss=57.704, backward_time=1.034, grad_norm=132.000, clip=100.000, loss_scale=3.065e+32, optim_step_time=0.182, optim0_lr0=4.975e-05, train_time=3.365, time=4 hours, 40 minutes and 41.9 seconds, total_count=490000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.162, cer_ctc=0.252, loss_att=38.727, acc=0.677, cer=0.407, wer=0.996, loss=40.058, time=8 minutes and 9.5 seconds, total_count=50094, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 54.24 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 09:23:36,296 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/47epoch.pth
+[gpub001:0/64] 2023-07-15 09:23:36,354 (trainer:272) INFO: 53/60epoch started. Estimated time to finish: 1 day, 15 hours and 44 minutes
+[gpub001:0/64] 2023-07-15 09:23:37,760 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 09:23:55,704 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5d5770eb0>)
+[gpub001:0/64] 2023-07-15 09:23:59,027 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 09:23:59,041 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 09:31:30,451 (trainer:732) INFO: 53epoch:train:1-100batch: iter_time=3.311, forward_time=0.176, loss_ctc=77.249, loss_att=58.785, acc=0.708, loss=64.324, backward_time=1.043, grad_norm=155.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=4.951e-05, train_time=9.474
+[gpub001:0/64] 2023-07-15 09:33:55,969 (trainer:732) INFO: 53epoch:train:101-200batch: iter_time=1.288e-04, forward_time=0.189, loss_ctc=71.700, loss_att=52.162, acc=0.707, loss=58.023, backward_time=1.035, grad_norm=122.394, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.950e-05, train_time=2.911
+[gpub001:0/64] 2023-07-15 09:36:26,554 (trainer:732) INFO: 53epoch:train:201-300batch: iter_time=0.001, forward_time=0.236, loss_ctc=66.496, loss_att=46.521, acc=0.739, loss=52.513, backward_time=1.048, grad_norm=117.834, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.950e-05, train_time=3.011
+[gpub001:0/64] 2023-07-15 09:39:01,453 (trainer:732) INFO: 53epoch:train:301-400batch: iter_time=9.101e-04, forward_time=0.287, loss_ctc=75.211, loss_att=58.135, acc=0.711, loss=63.258, backward_time=1.053, grad_norm=108.586, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.190, optim0_lr0=4.949e-05, train_time=3.098
+[gpub001:0/64] 2023-07-15 09:41:30,212 (trainer:732) INFO: 53epoch:train:401-500batch: iter_time=3.592e-04, forward_time=0.238, loss_ctc=68.220, loss_att=49.582, acc=0.727, loss=55.173, backward_time=1.045, grad_norm=122.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.949e-05, train_time=2.975
+[gpub001:0/64] 2023-07-15 09:43:59,845 (trainer:732) INFO: 53epoch:train:501-600batch: iter_time=0.003, forward_time=0.238, loss_ctc=65.657, loss_att=47.631, acc=0.729, loss=53.039, backward_time=1.049, grad_norm=152.877, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=2.992
+[gpub001:0/64] 2023-07-15 09:46:31,374 (trainer:732) INFO: 53epoch:train:601-700batch: iter_time=1.153e-04, forward_time=0.177, loss_ctc=72.494, loss_att=54.683, acc=0.710, loss=60.026, backward_time=1.055, grad_norm=148.168, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=4.948e-05, train_time=3.031
+[gpub001:0/64] 2023-07-15 09:49:00,984 (trainer:732) INFO: 53epoch:train:701-800batch: iter_time=6.123e-04, forward_time=0.231, loss_ctc=65.196, loss_att=47.527, acc=0.726, loss=52.828, backward_time=1.042, grad_norm=130.075, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.947e-05, train_time=2.992
+[gpub001:0/64] 2023-07-15 09:49:58,288 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 09:50:16,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca783d7f70>)
+[gpub001:0/64] 2023-07-15 09:50:19,748 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 09:50:19,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 09:58:24,759 (trainer:732) INFO: 53epoch:train:801-900batch: iter_time=4.199, forward_time=0.197, loss_ctc=70.653, loss_att=50.295, acc=0.719, loss=56.402, backward_time=1.041, grad_norm=118.018, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.947e-05, train_time=11.275
+[gpub001:0/64] 2023-07-15 10:00:41,776 (trainer:732) INFO: 53epoch:train:901-1000batch: iter_time=1.306e-04, forward_time=0.151, loss_ctc=77.224, loss_att=55.334, acc=0.705, loss=61.901, backward_time=1.032, grad_norm=132.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 10:02:57,402 (trainer:732) INFO: 53epoch:train:1001-1100batch: iter_time=1.317e-04, forward_time=0.148, loss_ctc=64.572, loss_att=46.014, acc=0.728, loss=51.581, backward_time=1.027, grad_norm=123.422, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.946e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 10:05:13,259 (trainer:732) INFO: 53epoch:train:1101-1200batch: iter_time=1.391e-04, forward_time=0.148, loss_ctc=73.301, loss_att=55.843, acc=0.715, loss=61.080, backward_time=1.029, grad_norm=135.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 10:07:28,967 (trainer:732) INFO: 53epoch:train:1201-1300batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=63.739, loss_att=47.917, acc=0.729, loss=52.664, backward_time=1.028, grad_norm=114.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.945e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 10:09:44,862 (trainer:732) INFO: 53epoch:train:1301-1400batch: iter_time=1.252e-04, forward_time=0.149, loss_ctc=67.179, loss_att=47.097, acc=0.730, loss=53.122, backward_time=1.028, grad_norm=116.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 10:12:00,515 (trainer:732) INFO: 53epoch:train:1401-1500batch: iter_time=1.255e-04, forward_time=0.148, loss_ctc=68.130, loss_att=49.836, acc=0.714, loss=55.324, backward_time=1.027, grad_norm=129.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.944e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 10:14:17,608 (trainer:732) INFO: 53epoch:train:1501-1600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=67.907, loss_att=53.286, acc=0.708, loss=57.672, backward_time=1.028, grad_norm=135.533, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.742
+[gpub001:0/64] 2023-07-15 10:15:56,480 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 10:16:14,877 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd11e397d30>)
+[gpub001:0/64] 2023-07-15 10:16:18,381 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 10:16:18,387 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 10:20:43,071 (trainer:732) INFO: 53epoch:train:1601-1700batch: iter_time=1.340, forward_time=0.148, loss_ctc=70.666, loss_att=56.816, acc=0.715, loss=60.971, backward_time=1.036, grad_norm=162.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=7.709
+[gpub001:0/64] 2023-07-15 10:22:59,760 (trainer:732) INFO: 53epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=71.652, loss_att=52.533, acc=0.712, loss=58.269, backward_time=1.033, grad_norm=141.266, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.943e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 10:25:15,620 (trainer:732) INFO: 53epoch:train:1801-1900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=76.945, loss_att=54.785, acc=0.701, loss=61.433, backward_time=1.028, grad_norm=120.255, clip=100.000, loss_scale=1.882e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 10:27:32,921 (trainer:732) INFO: 53epoch:train:1901-2000batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=65.557, loss_att=44.420, acc=0.733, loss=50.761, backward_time=1.026, grad_norm=170.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.942e-05, train_time=2.746
+[gpub001:0/64] 2023-07-15 10:29:54,896 (trainer:732) INFO: 53epoch:train:2001-2100batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=78.932, loss_att=60.005, acc=0.708, loss=65.683, backward_time=1.032, grad_norm=154.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.839
+[gpub001:0/64] 2023-07-15 10:32:19,721 (trainer:732) INFO: 53epoch:train:2101-2200batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=65.440, loss_att=48.548, acc=0.725, loss=53.615, backward_time=1.039, grad_norm=135.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.941e-05, train_time=2.896
+[gpub001:0/64] 2023-07-15 10:34:37,291 (trainer:732) INFO: 53epoch:train:2201-2300batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=63.624, loss_att=44.869, acc=0.738, loss=50.495, backward_time=1.031, grad_norm=144.749, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.751
+[gpub001:0/64] 2023-07-15 10:36:56,681 (trainer:732) INFO: 53epoch:train:2301-2400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.643, loss_att=51.467, acc=0.712, loss=57.520, backward_time=1.028, grad_norm=180.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.940e-05, train_time=2.788
+[gpub001:0/64] 2023-07-15 10:39:14,003 (trainer:732) INFO: 53epoch:train:2401-2500batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=64.355, loss_att=47.519, acc=0.719, loss=52.570, backward_time=1.029, grad_norm=114.242, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.939e-05, train_time=2.746
+[gpub001:0/64] 2023-07-15 10:39:17,582 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 10:39:35,923 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca783f46a0>)
+[gpub001:0/64] 2023-07-15 10:39:39,455 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 10:39:39,461 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 10:44:46,978 (trainer:732) INFO: 53epoch:train:2501-2600batch: iter_time=1.885, forward_time=0.176, loss_ctc=73.411, loss_att=53.483, acc=0.721, loss=59.461, backward_time=1.044, grad_norm=123.712, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.939e-05, train_time=6.659
+[gpub001:0/64] 2023-07-15 10:47:15,365 (trainer:732) INFO: 53epoch:train:2601-2700batch: iter_time=1.000e-04, forward_time=0.145, loss_ctc=75.773, loss_att=56.053, acc=0.699, loss=61.969, backward_time=1.046, grad_norm=116.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.968
+[gpub001:0/64] 2023-07-15 10:49:45,331 (trainer:732) INFO: 53epoch:train:2701-2800batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=65.403, loss_att=45.525, acc=0.728, loss=51.488, backward_time=1.046, grad_norm=116.454, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.938e-05, train_time=2.999
+[gpub001:0/64] 2023-07-15 10:52:05,173 (trainer:732) INFO: 53epoch:train:2801-2900batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=73.294, loss_att=53.142, acc=0.721, loss=59.188, backward_time=1.038, grad_norm=148.478, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=2.797
+[gpub001:0/64] 2023-07-15 10:54:39,743 (trainer:732) INFO: 53epoch:train:2901-3000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=67.600, loss_att=51.971, acc=0.725, loss=56.660, backward_time=1.042, grad_norm=138.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.937e-05, train_time=3.091
+[gpub001:0/64] 2023-07-15 10:56:58,255 (trainer:732) INFO: 53epoch:train:3001-3100batch: iter_time=1.071e-04, forward_time=0.144, loss_ctc=65.833, loss_att=46.073, acc=0.732, loss=52.001, backward_time=1.031, grad_norm=112.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.770
+[gpub001:0/64] 2023-07-15 10:59:19,483 (trainer:732) INFO: 53epoch:train:3101-3200batch: iter_time=1.012e-04, forward_time=0.145, loss_ctc=66.802, loss_att=49.191, acc=0.717, loss=54.475, backward_time=1.039, grad_norm=159.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.936e-05, train_time=2.824
+[gpub001:0/64] 2023-07-15 11:00:27,528 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 11:01:42,885 (trainer:732) INFO: 53epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.145, loss_ctc=69.338, loss_att=52.658, acc=0.720, loss=57.662, backward_time=1.040, grad_norm=112.301, clip=100.000, loss_scale=2.351e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=2.868
+[gpub001:0/64] 2023-07-15 11:02:40,035 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 11:02:58,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5bf5bb4f0>)
+[gpub001:0/64] 2023-07-15 11:03:01,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 11:03:01,537 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 11:07:57,739 (trainer:732) INFO: 53epoch:train:3301-3400batch: iter_time=2.006, forward_time=0.145, loss_ctc=69.350, loss_att=48.327, acc=0.728, loss=54.634, backward_time=1.039, grad_norm=118.392, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.935e-05, train_time=7.497
+[gpub001:0/64] 2023-07-15 11:10:14,439 (trainer:732) INFO: 53epoch:train:3401-3500batch: iter_time=1.282e-04, forward_time=0.147, loss_ctc=77.528, loss_att=60.141, acc=0.707, loss=65.357, backward_time=1.032, grad_norm=137.504, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 11:12:30,025 (trainer:732) INFO: 53epoch:train:3501-3600batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=67.358, loss_att=47.997, acc=0.719, loss=53.805, backward_time=1.026, grad_norm=117.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.934e-05, train_time=2.711
+[gpub001:0/64] 2023-07-15 11:14:48,416 (trainer:732) INFO: 53epoch:train:3601-3700batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=73.623, loss_att=53.854, acc=0.738, loss=59.785, backward_time=1.029, grad_norm=143.444, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=2.768
+[gpub001:0/64] 2023-07-15 11:17:21,226 (trainer:732) INFO: 53epoch:train:3701-3800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=70.255, loss_att=50.718, acc=0.718, loss=56.579, backward_time=1.047, grad_norm=167.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.933e-05, train_time=3.056
+[gpub001:0/64] 2023-07-15 11:19:38,219 (trainer:732) INFO: 53epoch:train:3801-3900batch: iter_time=1.312e-04, forward_time=0.148, loss_ctc=65.971, loss_att=48.571, acc=0.738, loss=53.791, backward_time=1.029, grad_norm=126.572, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.932e-05, train_time=2.740
+[gpub001:0/64] 2023-07-15 11:21:58,735 (trainer:732) INFO: 53epoch:train:3901-4000batch: iter_time=1.312e-04, forward_time=0.147, loss_ctc=63.471, loss_att=47.210, acc=0.729, loss=52.089, backward_time=1.030, grad_norm=141.710, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.932e-05, train_time=2.810
+[gpub001:0/64] 2023-07-15 11:24:16,739 (trainer:732) INFO: 53epoch:train:4001-4100batch: iter_time=1.362e-04, forward_time=0.148, loss_ctc=71.783, loss_att=52.931, acc=0.721, loss=58.586, backward_time=1.028, grad_norm=117.704, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.760
+[gpub001:0/64] 2023-07-15 11:25:58,082 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 11:26:16,209 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbd9d1e7a0>)
+[gpub001:0/64] 2023-07-15 11:26:19,575 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 11:26:19,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 11:31:12,862 (trainer:732) INFO: 53epoch:train:4101-4200batch: iter_time=1.396, forward_time=0.174, loss_ctc=67.126, loss_att=50.278, acc=0.730, loss=55.332, backward_time=1.038, grad_norm=122.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.931e-05, train_time=8.322
+[gpub001:0/64] 2023-07-15 11:33:32,008 (trainer:732) INFO: 53epoch:train:4201-4300batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=72.073, loss_att=52.426, acc=0.717, loss=58.320, backward_time=1.031, grad_norm=122.383, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.931e-05, train_time=2.783
+[gpub001:0/64] 2023-07-15 11:35:47,563 (trainer:732) INFO: 53epoch:train:4301-4400batch: iter_time=9.438e-05, forward_time=0.144, loss_ctc=75.067, loss_att=52.917, acc=0.710, loss=59.562, backward_time=1.028, grad_norm=140.286, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.711
+[gpub001:0/64] 2023-07-15 11:38:03,264 (trainer:732) INFO: 53epoch:train:4401-4500batch: iter_time=1.051e-04, forward_time=0.146, loss_ctc=63.278, loss_att=44.313, acc=0.735, loss=50.003, backward_time=1.027, grad_norm=155.846, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.930e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 11:40:20,170 (trainer:732) INFO: 53epoch:train:4501-4600batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=75.680, loss_att=58.305, acc=0.715, loss=63.517, backward_time=1.032, grad_norm=150.291, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 11:42:36,442 (trainer:732) INFO: 53epoch:train:4601-4700batch: iter_time=1.032e-04, forward_time=0.144, loss_ctc=65.197, loss_att=48.186, acc=0.727, loss=53.289, backward_time=1.027, grad_norm=138.175, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.929e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 11:44:54,859 (trainer:732) INFO: 53epoch:train:4701-4800batch: iter_time=9.797e-05, forward_time=0.145, loss_ctc=64.764, loss_att=45.735, acc=0.737, loss=51.444, backward_time=1.029, grad_norm=220.036, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.768
+[gpub001:0/64] 2023-07-15 11:47:16,838 (trainer:732) INFO: 53epoch:train:4801-4900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.186, loss_att=49.914, acc=0.714, loss=55.995, backward_time=1.034, grad_norm=126.685, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.928e-05, train_time=2.839
+[gpub001:0/64] 2023-07-15 11:49:35,518 (trainer:732) INFO: 53epoch:train:4901-5000batch: iter_time=1.101e-04, forward_time=0.144, loss_ctc=63.751, loss_att=47.279, acc=0.720, loss=52.221, backward_time=1.036, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=2.773
+[gpub001:0/64] 2023-07-15 11:49:40,092 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 11:49:58,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fce991a4d30>)
+[gpub001:0/64] 2023-07-15 11:50:01,785 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 11:50:01,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 11:56:06,405 (trainer:732) INFO: 53epoch:train:5001-5100batch: iter_time=1.688, forward_time=0.158, loss_ctc=76.559, loss_att=57.521, acc=0.703, loss=63.233, backward_time=1.040, grad_norm=125.506, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=7.818
+[gpub001:0/64] 2023-07-15 11:58:22,014 (trainer:732) INFO: 53epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.719, loss_att=48.716, acc=0.715, loss=55.017, backward_time=1.026, grad_norm=133.259, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.926e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 12:00:37,642 (trainer:732) INFO: 53epoch:train:5201-5300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=66.930, loss_att=46.500, acc=0.734, loss=52.629, backward_time=1.027, grad_norm=125.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.926e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 12:02:53,478 (trainer:732) INFO: 53epoch:train:5301-5400batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=73.301, loss_att=55.694, acc=0.715, loss=60.976, backward_time=1.028, grad_norm=137.805, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 12:05:09,897 (trainer:732) INFO: 53epoch:train:5401-5500batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=67.056, loss_att=49.311, acc=0.723, loss=54.635, backward_time=1.028, grad_norm=120.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.925e-05, train_time=2.728
+[gpub001:0/64] 2023-07-15 12:07:26,024 (trainer:732) INFO: 53epoch:train:5501-5600batch: iter_time=1.192e-04, forward_time=0.147, loss_ctc=63.690, loss_att=46.252, acc=0.732, loss=51.484, backward_time=1.029, grad_norm=139.681, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 12:09:41,948 (trainer:732) INFO: 53epoch:train:5601-5700batch: iter_time=1.552e-04, forward_time=0.147, loss_ctc=70.104, loss_att=52.001, acc=0.712, loss=57.432, backward_time=1.028, grad_norm=107.690, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.924e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 12:11:58,251 (trainer:732) INFO: 53epoch:train:5701-5800batch: iter_time=1.338e-04, forward_time=0.148, loss_ctc=63.833, loss_att=47.985, acc=0.723, loss=52.739, backward_time=1.029, grad_norm=129.181, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 12:12:53,156 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 12:13:10,963 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd0c8c9caf0>)
+[gpub001:0/64] 2023-07-15 12:13:14,420 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 12:13:14,439 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 12:18:21,859 (trainer:732) INFO: 53epoch:train:5801-5900batch: iter_time=1.831, forward_time=0.146, loss_ctc=71.893, loss_att=49.578, acc=0.732, loss=56.273, backward_time=1.038, grad_norm=140.020, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.923e-05, train_time=7.672
+[gpub001:0/64] 2023-07-15 12:20:39,651 (trainer:732) INFO: 53epoch:train:5901-6000batch: iter_time=1.334e-04, forward_time=0.153, loss_ctc=77.718, loss_att=60.620, acc=0.708, loss=65.749, backward_time=1.031, grad_norm=115.613, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.756
+[gpub001:0/64] 2023-07-15 12:22:55,460 (trainer:732) INFO: 53epoch:train:6001-6100batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=66.395, loss_att=47.215, acc=0.723, loss=52.969, backward_time=1.028, grad_norm=119.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.922e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 12:25:14,502 (trainer:732) INFO: 53epoch:train:6101-6200batch: iter_time=0.003, forward_time=0.146, loss_ctc=73.384, loss_att=54.398, acc=0.735, loss=60.094, backward_time=1.038, grad_norm=134.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.781
+[gpub001:0/64] 2023-07-15 12:27:31,260 (trainer:732) INFO: 53epoch:train:6201-6300batch: iter_time=1.497e-04, forward_time=0.148, loss_ctc=68.480, loss_att=49.380, acc=0.721, loss=55.110, backward_time=1.029, grad_norm=133.346, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.921e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 12:29:47,555 (trainer:732) INFO: 53epoch:train:6301-6400batch: iter_time=9.751e-05, forward_time=0.147, loss_ctc=64.980, loss_att=48.650, acc=0.736, loss=53.549, backward_time=1.029, grad_norm=109.673, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.920e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 12:32:03,537 (trainer:732) INFO: 53epoch:train:6401-6500batch: iter_time=9.952e-05, forward_time=0.147, loss_ctc=64.249, loss_att=47.186, acc=0.732, loss=52.305, backward_time=1.029, grad_norm=131.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.920e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 12:34:25,293 (trainer:732) INFO: 53epoch:train:6501-6600batch: iter_time=6.416e-04, forward_time=0.158, loss_ctc=70.757, loss_att=53.176, acc=0.719, loss=58.450, backward_time=1.030, grad_norm=142.896, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.920e-05, train_time=2.835
+[gpub001:0/64] 2023-07-15 12:36:07,690 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 12:36:25,678 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5f0f674f0>)
+[gpub001:0/64] 2023-07-15 12:36:29,157 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 12:36:29,164 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 12:42:08,942 (trainer:732) INFO: 53epoch:train:6601-6700batch: iter_time=1.626, forward_time=0.195, loss_ctc=66.551, loss_att=48.083, acc=0.738, loss=53.624, backward_time=1.040, grad_norm=118.665, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.919e-05, train_time=9.271
+[gpub001:0/64] 2023-07-15 12:44:26,138 (trainer:732) INFO: 53epoch:train:6701-6800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=72.189, loss_att=55.718, acc=0.714, loss=60.660, backward_time=1.030, grad_norm=123.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.919e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 12:46:42,970 (trainer:732) INFO: 53epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=74.928, loss_att=53.819, acc=0.718, loss=60.152, backward_time=1.032, grad_norm=125.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 12:48:58,893 (trainer:732) INFO: 53epoch:train:6901-7000batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=63.121, loss_att=45.095, acc=0.738, loss=50.503, backward_time=1.027, grad_norm=109.171, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.918e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 12:51:14,725 (trainer:732) INFO: 53epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=77.407, loss_att=58.128, acc=0.716, loss=63.911, backward_time=1.029, grad_norm=119.319, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 12:53:30,552 (trainer:732) INFO: 53epoch:train:7101-7200batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=64.425, loss_att=47.865, acc=0.737, loss=52.833, backward_time=1.028, grad_norm=136.329, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.917e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 12:55:46,313 (trainer:732) INFO: 53epoch:train:7201-7300batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=64.912, loss_att=45.342, acc=0.743, loss=51.213, backward_time=1.029, grad_norm=110.998, clip=100.000, loss_scale=2.499e+32, optim_step_time=0.182, optim0_lr0=4.916e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 12:58:02,139 (trainer:732) INFO: 53epoch:train:7301-7400batch: iter_time=1.144e-04, forward_time=0.147, loss_ctc=70.223, loss_att=50.001, acc=0.723, loss=56.068, backward_time=1.028, grad_norm=128.640, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.916e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 13:00:18,214 (trainer:732) INFO: 53epoch:train:7401-7500batch: iter_time=1.159e-04, forward_time=0.148, loss_ctc=62.811, loss_att=47.105, acc=0.732, loss=51.817, backward_time=1.030, grad_norm=123.147, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.915e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 13:00:22,641 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 13:00:40,613 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fce46a1f790>)
+[gpub001:0/64] 2023-07-15 13:00:44,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 13:00:44,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 13:06:53,659 (trainer:732) INFO: 53epoch:train:7501-7600batch: iter_time=1.682, forward_time=0.153, loss_ctc=71.689, loss_att=50.993, acc=0.731, loss=57.202, backward_time=1.056, grad_norm=110.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.915e-05, train_time=7.909
+[gpub001:0/64] 2023-07-15 13:09:10,347 (trainer:732) INFO: 53epoch:train:7601-7700batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=75.829, loss_att=57.328, acc=0.712, loss=62.878, backward_time=1.028, grad_norm=137.383, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 13:11:26,829 (trainer:732) INFO: 53epoch:train:7701-7800batch: iter_time=1.304e-04, forward_time=0.148, loss_ctc=65.217, loss_att=45.189, acc=0.736, loss=51.198, backward_time=1.029, grad_norm=122.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.914e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 13:13:42,554 (trainer:732) INFO: 53epoch:train:7801-7900batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=74.146, loss_att=54.219, acc=0.730, loss=60.197, backward_time=1.028, grad_norm=119.101, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 13:15:58,492 (trainer:732) INFO: 53epoch:train:7901-8000batch: iter_time=1.298e-04, forward_time=0.147, loss_ctc=64.820, loss_att=50.432, acc=0.736, loss=54.748, backward_time=1.029, grad_norm=136.726, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.913e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 13:18:14,196 (trainer:732) INFO: 53epoch:train:8001-8100batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=65.159, loss_att=46.648, acc=0.739, loss=52.201, backward_time=1.027, grad_norm=129.032, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 13:20:30,190 (trainer:732) INFO: 53epoch:train:8101-8200batch: iter_time=1.194e-04, forward_time=0.148, loss_ctc=67.450, loss_att=49.032, acc=0.722, loss=54.557, backward_time=1.029, grad_norm=149.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.912e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 13:22:47,766 (trainer:732) INFO: 53epoch:train:8201-8300batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=69.780, loss_att=53.305, acc=0.725, loss=58.248, backward_time=1.029, grad_norm=135.432, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.911e-05, train_time=2.751
+[gpub001:0/64] 2023-07-15 13:23:44,787 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 13:24:02,966 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc9501831f0>)
+[gpub001:0/64] 2023-07-15 13:24:06,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 13:24:06,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 13:30:26,163 (trainer:732) INFO: 53epoch:train:8301-8400batch: iter_time=2.133, forward_time=0.174, loss_ctc=68.152, loss_att=51.251, acc=0.719, loss=56.321, backward_time=1.042, grad_norm=127.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.911e-05, train_time=9.168
+[gpub001:0/64] 2023-07-15 13:32:43,842 (trainer:732) INFO: 53epoch:train:8401-8500batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=75.378, loss_att=53.951, acc=0.719, loss=60.379, backward_time=1.030, grad_norm=120.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.753
+[gpub001:0/64] 2023-07-15 13:35:00,654 (trainer:732) INFO: 53epoch:train:8501-8600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=63.746, loss_att=45.123, acc=0.736, loss=50.710, backward_time=1.027, grad_norm=118.053, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 13:37:16,881 (trainer:732) INFO: 53epoch:train:8601-8700batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=72.822, loss_att=55.363, acc=0.722, loss=60.601, backward_time=1.029, grad_norm=139.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.910e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 13:39:32,720 (trainer:732) INFO: 53epoch:train:8701-8800batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=62.997, loss_att=46.111, acc=0.740, loss=51.177, backward_time=1.028, grad_norm=115.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 13:41:48,463 (trainer:732) INFO: 53epoch:train:8801-8900batch: iter_time=1.169e-04, forward_time=0.147, loss_ctc=66.723, loss_att=46.736, acc=0.734, loss=52.732, backward_time=1.028, grad_norm=102.729, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.909e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 13:44:04,390 (trainer:732) INFO: 53epoch:train:8901-9000batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=65.966, loss_att=49.188, acc=0.724, loss=54.222, backward_time=1.029, grad_norm=110.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 13:46:20,504 (trainer:732) INFO: 53epoch:train:9001-9100batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.534, loss_att=51.762, acc=0.723, loss=56.493, backward_time=1.030, grad_norm=127.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.908e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 13:47:59,767 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 13:48:18,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc90f27e440>)
+[gpub001:0/64] 2023-07-15 13:48:21,456 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 13:48:21,462 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 13:52:48,491 (trainer:732) INFO: 53epoch:train:9101-9200batch: iter_time=1.496, forward_time=0.181, loss_ctc=70.548, loss_att=52.593, acc=0.730, loss=57.980, backward_time=1.037, grad_norm=137.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.907e-05, train_time=7.758
+[gpub001:0/64] 2023-07-15 13:55:08,582 (trainer:732) INFO: 53epoch:train:9201-9300batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=71.911, loss_att=54.358, acc=0.714, loss=59.624, backward_time=1.034, grad_norm=117.736, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.907e-05, train_time=2.803
+[gpub001:0/64] 2023-07-15 13:57:24,812 (trainer:732) INFO: 53epoch:train:9301-9400batch: iter_time=1.046e-04, forward_time=0.144, loss_ctc=75.774, loss_att=53.635, acc=0.711, loss=60.277, backward_time=1.026, grad_norm=116.262, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 13:59:47,479 (trainer:732) INFO: 53epoch:train:9401-9500batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=63.534, loss_att=44.227, acc=0.735, loss=50.019, backward_time=1.037, grad_norm=112.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.906e-05, train_time=2.853
+[gpub001:0/64] 2023-07-15 14:02:07,901 (trainer:732) INFO: 53epoch:train:9501-9600batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=75.989, loss_att=57.625, acc=0.716, loss=63.134, backward_time=1.035, grad_norm=128.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.808
+[gpub001:0/64] 2023-07-15 14:04:30,560 (trainer:732) INFO: 53epoch:train:9601-9700batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.849, loss_att=47.746, acc=0.733, loss=52.877, backward_time=1.032, grad_norm=145.957, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.905e-05, train_time=2.853
+[gpub001:0/64] 2023-07-15 14:06:49,135 (trainer:732) INFO: 53epoch:train:9701-9800batch: iter_time=1.010e-04, forward_time=0.145, loss_ctc=64.302, loss_att=46.018, acc=0.737, loss=51.503, backward_time=1.028, grad_norm=115.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.771
+[gpub001:0/64] 2023-07-15 14:09:06,265 (trainer:732) INFO: 53epoch:train:9801-9900batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=70.320, loss_att=50.257, acc=0.719, loss=56.276, backward_time=1.032, grad_norm=123.049, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.904e-05, train_time=2.742
+[gpub001:0/64] 2023-07-15 14:11:24,322 (trainer:732) INFO: 53epoch:train:9901-10000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=62.892, loss_att=47.066, acc=0.722, loss=51.814, backward_time=1.028, grad_norm=133.856, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.903e-05, train_time=2.761
+[gpub001:0/64] 2023-07-15 14:25:24,779 (trainer:338) INFO: 53epoch results: [train] iter_time=0.246, forward_time=0.155, loss_ctc=69.208, loss_att=50.743, acc=0.723, loss=56.282, backward_time=1.033, grad_norm=130.642, clip=100.000, loss_scale=2.290e+32, optim_step_time=0.182, optim0_lr0=4.927e-05, train_time=3.453, time=4 hours, 48 minutes and 11.54 seconds, total_count=500000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=43.074, cer_ctc=0.248, loss_att=38.316, acc=0.682, cer=0.391, wer=0.994, loss=39.744, time=7 minutes and 22.59 seconds, total_count=51106, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 14.16 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 14:25:40,654 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/48epoch.pth
+[gpub001:0/64] 2023-07-15 14:25:40,674 (trainer:272) INFO: 54/60epoch started. Estimated time to finish: 1 day, 10 hours and 52 minutes
+[gpub001:0/64] 2023-07-15 14:25:40,788 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 14:25:59,055 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5da479330>)
+[gpub001:0/64] 2023-07-15 14:26:03,030 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 14:26:03,051 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 14:32:22,669 (trainer:732) INFO: 54epoch:train:1-100batch: iter_time=2.563, forward_time=0.175, loss_ctc=63.502, loss_att=48.578, acc=0.708, loss=53.055, backward_time=1.049, grad_norm=140.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=4.903e-05, train_time=8.037
+[gpub001:0/64] 2023-07-15 14:34:38,802 (trainer:732) INFO: 54epoch:train:101-200batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=62.392, loss_att=47.095, acc=0.700, loss=51.684, backward_time=1.029, grad_norm=120.836, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 14:36:54,626 (trainer:732) INFO: 54epoch:train:201-300batch: iter_time=9.869e-05, forward_time=0.145, loss_ctc=72.046, loss_att=53.141, acc=0.703, loss=58.812, backward_time=1.028, grad_norm=147.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 14:39:10,400 (trainer:732) INFO: 54epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.144, loss_ctc=68.492, loss_att=54.163, acc=0.695, loss=58.462, backward_time=1.028, grad_norm=132.331, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.902e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 14:41:40,557 (trainer:732) INFO: 54epoch:train:401-500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=74.081, loss_att=54.631, acc=0.712, loss=60.466, backward_time=1.039, grad_norm=142.817, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=3.003
+[gpub001:0/64] 2023-07-15 14:44:08,475 (trainer:732) INFO: 54epoch:train:501-600batch: iter_time=1.074e-04, forward_time=0.146, loss_ctc=75.789, loss_att=56.471, acc=0.714, loss=62.266, backward_time=1.041, grad_norm=129.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.901e-05, train_time=2.958
+[gpub001:0/64] 2023-07-15 14:46:25,669 (trainer:732) INFO: 54epoch:train:601-700batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=80.391, loss_att=63.078, acc=0.698, loss=68.272, backward_time=1.030, grad_norm=144.568, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.744
+[gpub001:0/64] 2023-07-15 14:48:45,282 (trainer:732) INFO: 54epoch:train:701-800batch: iter_time=9.712e-05, forward_time=0.144, loss_ctc=76.898, loss_att=54.873, acc=0.711, loss=61.480, backward_time=1.034, grad_norm=129.027, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.900e-05, train_time=2.792
+[gpub001:0/64] 2023-07-15 14:49:40,812 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 14:49:58,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd16cb35b10>)
+[gpub001:0/64] 2023-07-15 14:50:02,149 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 14:50:02,155 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 14:55:15,387 (trainer:732) INFO: 54epoch:train:801-900batch: iter_time=1.314, forward_time=0.146, loss_ctc=67.481, loss_att=53.521, acc=0.714, loss=57.709, backward_time=1.050, grad_norm=123.737, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=7.802
+[gpub001:0/64] 2023-07-15 14:57:33,121 (trainer:732) INFO: 54epoch:train:901-1000batch: iter_time=1.229e-04, forward_time=0.147, loss_ctc=61.392, loss_att=45.351, acc=0.703, loss=50.163, backward_time=1.028, grad_norm=131.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.899e-05, train_time=2.754
+[gpub001:0/64] 2023-07-15 14:59:32,594 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 14:59:48,916 (trainer:732) INFO: 54epoch:train:1001-1100batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=65.249, loss_att=46.994, acc=0.724, loss=52.470, backward_time=1.028, grad_norm=120.404, clip=100.000, loss_scale=3.047e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 15:02:05,028 (trainer:732) INFO: 54epoch:train:1101-1200batch: iter_time=1.215e-04, forward_time=0.147, loss_ctc=68.386, loss_att=53.369, acc=0.710, loss=57.874, backward_time=1.030, grad_norm=121.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.898e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 15:04:21,637 (trainer:732) INFO: 54epoch:train:1201-1300batch: iter_time=1.120e-04, forward_time=0.148, loss_ctc=72.716, loss_att=55.502, acc=0.716, loss=60.666, backward_time=1.033, grad_norm=135.679, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 15:06:37,386 (trainer:732) INFO: 54epoch:train:1301-1400batch: iter_time=1.122e-04, forward_time=0.147, loss_ctc=70.008, loss_att=51.346, acc=0.719, loss=56.945, backward_time=1.029, grad_norm=128.324, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.897e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 15:08:54,748 (trainer:732) INFO: 54epoch:train:1401-1500batch: iter_time=1.246e-04, forward_time=0.149, loss_ctc=83.968, loss_att=69.519, acc=0.699, loss=73.854, backward_time=1.032, grad_norm=138.800, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.747
+[gpub001:0/64] 2023-07-15 15:11:10,946 (trainer:732) INFO: 54epoch:train:1501-1600batch: iter_time=1.227e-04, forward_time=0.148, loss_ctc=72.417, loss_att=52.717, acc=0.726, loss=58.627, backward_time=1.031, grad_norm=125.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.896e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 15:12:42,398 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 15:13:00,574 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca75d1d750>)
+[gpub001:0/64] 2023-07-15 15:13:04,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 15:13:04,265 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 15:17:27,833 (trainer:732) INFO: 54epoch:train:1601-1700batch: iter_time=1.373, forward_time=0.166, loss_ctc=71.162, loss_att=54.576, acc=0.714, loss=59.552, backward_time=1.038, grad_norm=130.731, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=4.895e-05, train_time=7.537
+[gpub001:0/64] 2023-07-15 15:19:44,876 (trainer:732) INFO: 54epoch:train:1701-1800batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=57.674, loss_att=42.891, acc=0.710, loss=47.326, backward_time=1.033, grad_norm=146.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.895e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 15:22:00,770 (trainer:732) INFO: 54epoch:train:1801-1900batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=69.982, loss_att=52.765, acc=0.709, loss=57.930, backward_time=1.029, grad_norm=110.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 15:24:16,473 (trainer:732) INFO: 54epoch:train:1901-2000batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=68.795, loss_att=50.053, acc=0.721, loss=55.676, backward_time=1.027, grad_norm=128.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 15:26:32,120 (trainer:732) INFO: 54epoch:train:2001-2100batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=71.032, loss_att=54.634, acc=0.702, loss=59.553, backward_time=1.026, grad_norm=146.644, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.894e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 15:28:47,771 (trainer:732) INFO: 54epoch:train:2101-2200batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=69.011, loss_att=51.902, acc=0.714, loss=57.035, backward_time=1.027, grad_norm=146.707, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 15:31:09,057 (trainer:732) INFO: 54epoch:train:2201-2300batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=77.368, loss_att=56.839, acc=0.720, loss=62.997, backward_time=1.037, grad_norm=146.981, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.893e-05, train_time=2.826
+[gpub001:0/64] 2023-07-15 15:33:27,573 (trainer:732) INFO: 54epoch:train:2301-2400batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=72.933, loss_att=60.662, acc=0.700, loss=64.343, backward_time=1.031, grad_norm=140.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=2.770
+[gpub001:0/64] 2023-07-15 15:36:01,415 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 15:36:19,553 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 15:36:23,193 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca75d4b460>)
+[gpub001:0/64] 2023-07-15 15:36:23,194 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 15:36:23,200 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 15:39:55,199 (trainer:732) INFO: 54epoch:train:2401-2500batch: iter_time=2.445, forward_time=0.145, loss_ctc=74.369, loss_att=54.412, acc=0.716, loss=60.399, backward_time=1.038, grad_norm=132.718, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.892e-05, train_time=7.752
+[gpub001:0/64] 2023-07-15 15:42:12,710 (trainer:732) INFO: 54epoch:train:2501-2600batch: iter_time=1.524e-04, forward_time=0.147, loss_ctc=59.869, loss_att=46.039, acc=0.696, loss=50.188, backward_time=1.033, grad_norm=131.162, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.750
+[gpub001:0/64] 2023-07-15 15:44:29,535 (trainer:732) INFO: 54epoch:train:2601-2700batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=65.703, loss_att=49.009, acc=0.712, loss=54.017, backward_time=1.025, grad_norm=128.350, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.891e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 15:46:45,197 (trainer:732) INFO: 54epoch:train:2701-2800batch: iter_time=1.495e-04, forward_time=0.147, loss_ctc=68.702, loss_att=53.287, acc=0.707, loss=57.911, backward_time=1.027, grad_norm=154.384, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 15:49:01,013 (trainer:732) INFO: 54epoch:train:2801-2900batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=73.798, loss_att=55.447, acc=0.706, loss=60.952, backward_time=1.029, grad_norm=127.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.890e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 15:49:11,705 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 15:51:16,520 (trainer:732) INFO: 54epoch:train:2901-3000batch: iter_time=1.349e-04, forward_time=0.146, loss_ctc=67.917, loss_att=49.080, acc=0.724, loss=54.731, backward_time=1.029, grad_norm=136.985, clip=100.000, loss_scale=8.610e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 15:53:32,664 (trainer:732) INFO: 54epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.148, loss_ctc=81.457, loss_att=61.670, acc=0.702, loss=67.606, backward_time=1.030, grad_norm=170.754, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.889e-05, train_time=2.723
+[gpub001:0/64] 2023-07-15 15:55:52,060 (trainer:732) INFO: 54epoch:train:3101-3200batch: iter_time=1.276e-04, forward_time=0.147, loss_ctc=74.030, loss_att=59.359, acc=0.705, loss=63.760, backward_time=1.030, grad_norm=138.690, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.788
+[gpub001:0/64] 2023-07-15 15:58:14,921 (trainer:732) INFO: 54epoch:train:3201-3300batch: iter_time=1.442e-04, forward_time=0.146, loss_ctc=71.516, loss_att=52.026, acc=0.713, loss=57.873, backward_time=1.035, grad_norm=134.585, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.888e-05, train_time=2.857
+[gpub001:0/64] 2023-07-15 15:59:06,473 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 15:59:24,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5b09eb490>)
+[gpub001:0/64] 2023-07-15 15:59:28,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 15:59:28,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 16:04:12,014 (trainer:732) INFO: 54epoch:train:3301-3400batch: iter_time=1.395, forward_time=0.180, loss_ctc=67.158, loss_att=49.393, acc=0.713, loss=54.722, backward_time=1.042, grad_norm=131.945, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=7.141
+[gpub001:0/64] 2023-07-15 16:06:28,728 (trainer:732) INFO: 54epoch:train:3401-3500batch: iter_time=9.250e-05, forward_time=0.146, loss_ctc=65.344, loss_att=46.346, acc=0.721, loss=52.046, backward_time=1.028, grad_norm=126.382, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 16:08:45,609 (trainer:732) INFO: 54epoch:train:3501-3600batch: iter_time=9.346e-05, forward_time=0.147, loss_ctc=69.324, loss_att=54.156, acc=0.716, loss=58.706, backward_time=1.030, grad_norm=127.329, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.887e-05, train_time=2.737
+[gpub001:0/64] 2023-07-15 16:11:02,343 (trainer:732) INFO: 54epoch:train:3601-3700batch: iter_time=9.335e-05, forward_time=0.146, loss_ctc=68.249, loss_att=52.531, acc=0.713, loss=57.246, backward_time=1.034, grad_norm=131.595, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 16:13:18,306 (trainer:732) INFO: 54epoch:train:3701-3800batch: iter_time=9.645e-05, forward_time=0.146, loss_ctc=67.231, loss_att=51.222, acc=0.716, loss=56.025, backward_time=1.030, grad_norm=145.759, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.886e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 16:15:35,928 (trainer:732) INFO: 54epoch:train:3801-3900batch: iter_time=9.782e-05, forward_time=0.146, loss_ctc=76.405, loss_att=57.298, acc=0.718, loss=63.030, backward_time=1.030, grad_norm=118.733, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.752
+[gpub001:0/64] 2023-07-15 16:17:53,102 (trainer:732) INFO: 54epoch:train:3901-4000batch: iter_time=9.738e-05, forward_time=0.146, loss_ctc=77.861, loss_att=59.602, acc=0.715, loss=65.080, backward_time=1.032, grad_norm=143.778, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.885e-05, train_time=2.743
+[gpub001:0/64] 2023-07-15 16:20:12,772 (trainer:732) INFO: 54epoch:train:4001-4100batch: iter_time=9.171e-05, forward_time=0.146, loss_ctc=71.078, loss_att=52.212, acc=0.724, loss=57.872, backward_time=1.031, grad_norm=122.391, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.884e-05, train_time=2.793
+[gpub001:0/64] 2023-07-15 16:21:56,724 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 16:22:14,655 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc90bbb5870>)
+[gpub001:0/64] 2023-07-15 16:22:18,028 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 16:22:18,035 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 16:29:57,478 (trainer:732) INFO: 54epoch:train:4101-4200batch: iter_time=4.421, forward_time=0.186, loss_ctc=72.658, loss_att=54.839, acc=0.713, loss=60.185, backward_time=1.041, grad_norm=113.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.884e-05, train_time=11.694
+[gpub001:0/64] 2023-07-15 16:32:14,169 (trainer:732) INFO: 54epoch:train:4201-4300batch: iter_time=1.315e-04, forward_time=0.149, loss_ctc=62.009, loss_att=45.248, acc=0.711, loss=50.276, backward_time=1.028, grad_norm=132.170, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.883e-05, train_time=2.734
+[gpub001:0/64] 2023-07-15 16:34:31,503 (trainer:732) INFO: 54epoch:train:4301-4400batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.740, loss_att=51.311, acc=0.716, loss=56.540, backward_time=1.028, grad_norm=148.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=4.883e-05, train_time=2.746
+[gpub001:0/64] 2023-07-15 16:36:47,096 (trainer:732) INFO: 54epoch:train:4401-4500batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.509, loss_att=49.340, acc=0.715, loss=53.891, backward_time=1.026, grad_norm=142.094, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.882e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 16:39:14,472 (trainer:732) INFO: 54epoch:train:4501-4600batch: iter_time=5.487e-04, forward_time=0.188, loss_ctc=71.996, loss_att=52.884, acc=0.721, loss=58.617, backward_time=1.057, grad_norm=114.514, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.191, optim0_lr0=4.882e-05, train_time=2.945
+[gpub001:0/64] 2023-07-15 16:41:40,016 (trainer:732) INFO: 54epoch:train:4601-4700batch: iter_time=1.099e-04, forward_time=0.216, loss_ctc=69.267, loss_att=53.635, acc=0.719, loss=58.325, backward_time=1.040, grad_norm=162.003, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.881e-05, train_time=2.913
+[gpub001:0/64] 2023-07-15 16:43:56,617 (trainer:732) INFO: 54epoch:train:4701-4800batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=79.072, loss_att=61.795, acc=0.708, loss=66.978, backward_time=1.032, grad_norm=130.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.881e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 16:46:13,780 (trainer:732) INFO: 54epoch:train:4801-4900batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=75.323, loss_att=51.827, acc=0.719, loss=58.876, backward_time=1.030, grad_norm=137.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.743
+[gpub001:0/64] 2023-07-15 16:48:31,776 (trainer:732) INFO: 54epoch:train:4901-5000batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=74.927, loss_att=57.150, acc=0.708, loss=62.483, backward_time=1.033, grad_norm=132.363, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=2.760
+[gpub001:0/64] 2023-07-15 16:48:53,054 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 16:49:11,204 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc90bbf35b0>)
+[gpub001:0/64] 2023-07-15 16:49:14,657 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 16:49:14,723 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 16:55:54,724 (trainer:732) INFO: 54epoch:train:5001-5100batch: iter_time=2.962, forward_time=0.147, loss_ctc=58.843, loss_att=46.145, acc=0.708, loss=49.955, backward_time=1.045, grad_norm=115.373, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=8.859
+[gpub001:0/64] 2023-07-15 16:58:11,635 (trainer:732) INFO: 54epoch:train:5101-5200batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=66.317, loss_att=46.348, acc=0.724, loss=52.339, backward_time=1.031, grad_norm=131.819, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 17:00:27,591 (trainer:732) INFO: 54epoch:train:5201-5300batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=67.278, loss_att=51.549, acc=0.714, loss=56.268, backward_time=1.029, grad_norm=135.720, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.879e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 17:02:43,684 (trainer:732) INFO: 54epoch:train:5301-5400batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=73.708, loss_att=57.069, acc=0.716, loss=62.061, backward_time=1.030, grad_norm=153.196, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 17:05:10,383 (trainer:732) INFO: 54epoch:train:5401-5500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=68.066, loss_att=48.978, acc=0.724, loss=54.704, backward_time=1.038, grad_norm=142.518, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.878e-05, train_time=2.934
+[gpub001:0/64] 2023-07-15 17:07:31,542 (trainer:732) INFO: 54epoch:train:5501-5600batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=79.121, loss_att=60.754, acc=0.714, loss=66.264, backward_time=1.037, grad_norm=156.564, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.823
+[gpub001:0/64] 2023-07-15 17:09:48,129 (trainer:732) INFO: 54epoch:train:5601-5700batch: iter_time=1.147e-04, forward_time=0.148, loss_ctc=71.339, loss_att=55.929, acc=0.723, loss=60.552, backward_time=1.033, grad_norm=154.635, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.877e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 17:12:04,010 (trainer:732) INFO: 54epoch:train:5701-5800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.126, loss_att=54.273, acc=0.721, loss=59.629, backward_time=1.028, grad_norm=132.905, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.876e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 17:12:54,371 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 17:13:12,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8f31f3670>)
+[gpub001:0/64] 2023-07-15 17:13:15,973 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 17:13:15,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 17:18:33,665 (trainer:732) INFO: 54epoch:train:5801-5900batch: iter_time=1.397, forward_time=0.233, loss_ctc=70.879, loss_att=52.656, acc=0.723, loss=58.123, backward_time=1.073, grad_norm=140.446, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=4.876e-05, train_time=7.793
+[gpub001:0/64] 2023-07-15 17:21:02,892 (trainer:732) INFO: 54epoch:train:5901-6000batch: iter_time=1.398e-04, forward_time=0.165, loss_ctc=60.858, loss_att=44.904, acc=0.700, loss=49.690, backward_time=1.040, grad_norm=132.788, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=4.875e-05, train_time=2.984
+[gpub001:0/64] 2023-07-15 17:23:19,553 (trainer:732) INFO: 54epoch:train:6001-6100batch: iter_time=1.350e-04, forward_time=0.145, loss_ctc=64.750, loss_att=48.839, acc=0.720, loss=53.613, backward_time=1.028, grad_norm=136.365, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.875e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 17:25:35,434 (trainer:732) INFO: 54epoch:train:6101-6200batch: iter_time=1.440e-04, forward_time=0.147, loss_ctc=66.947, loss_att=50.736, acc=0.710, loss=55.599, backward_time=1.026, grad_norm=127.831, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=4.874e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 17:27:59,502 (trainer:732) INFO: 54epoch:train:6201-6300batch: iter_time=1.252e-04, forward_time=0.147, loss_ctc=71.889, loss_att=54.562, acc=0.718, loss=59.761, backward_time=1.036, grad_norm=116.556, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.881
+[gpub001:0/64] 2023-07-15 17:30:18,402 (trainer:732) INFO: 54epoch:train:6301-6400batch: iter_time=1.250e-04, forward_time=0.147, loss_ctc=66.848, loss_att=48.903, acc=0.725, loss=54.286, backward_time=1.032, grad_norm=118.817, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.874e-05, train_time=2.778
+[gpub001:0/64] 2023-07-15 17:32:34,639 (trainer:732) INFO: 54epoch:train:6401-6500batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=79.035, loss_att=65.024, acc=0.701, loss=69.227, backward_time=1.030, grad_norm=146.629, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 17:34:53,111 (trainer:732) INFO: 54epoch:train:6501-6600batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=73.072, loss_att=54.719, acc=0.716, loss=60.225, backward_time=1.030, grad_norm=141.338, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.873e-05, train_time=2.769
+[gpub001:0/64] 2023-07-15 17:36:40,428 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 17:36:58,440 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 17:37:01,910 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8f31f77c0>)
+[gpub001:0/64] 2023-07-15 17:37:01,911 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 17:37:01,917 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 17:41:31,451 (trainer:732) INFO: 54epoch:train:6601-6700batch: iter_time=1.482, forward_time=0.207, loss_ctc=70.480, loss_att=52.884, acc=0.716, loss=58.163, backward_time=1.044, grad_norm=119.526, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=4.872e-05, train_time=7.967
+[gpub001:0/64] 2023-07-15 17:43:48,493 (trainer:732) INFO: 54epoch:train:6701-6800batch: iter_time=1.090e-04, forward_time=0.148, loss_ctc=56.961, loss_att=42.537, acc=0.721, loss=46.864, backward_time=1.031, grad_norm=127.255, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.872e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 17:46:04,798 (trainer:732) INFO: 54epoch:train:6801-6900batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=66.029, loss_att=48.816, acc=0.717, loss=53.980, backward_time=1.029, grad_norm=110.226, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 17:48:20,547 (trainer:732) INFO: 54epoch:train:6901-7000batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=68.985, loss_att=52.724, acc=0.718, loss=57.602, backward_time=1.026, grad_norm=155.820, clip=100.000, loss_scale=1.558e+32, optim_step_time=0.182, optim0_lr0=4.871e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 17:50:36,889 (trainer:732) INFO: 54epoch:train:7001-7100batch: iter_time=1.080e-04, forward_time=0.147, loss_ctc=69.839, loss_att=54.243, acc=0.714, loss=58.922, backward_time=1.029, grad_norm=144.642, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 17:52:52,346 (trainer:732) INFO: 54epoch:train:7101-7200batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.013, loss_att=51.423, acc=0.715, loss=56.100, backward_time=1.025, grad_norm=131.762, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.870e-05, train_time=2.709
+[gpub001:0/64] 2023-07-15 17:55:09,258 (trainer:732) INFO: 54epoch:train:7201-7300batch: iter_time=1.155e-04, forward_time=0.148, loss_ctc=73.813, loss_att=55.438, acc=0.729, loss=60.950, backward_time=1.031, grad_norm=134.728, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 17:57:25,341 (trainer:732) INFO: 54epoch:train:7301-7400batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=77.874, loss_att=60.697, acc=0.708, loss=65.850, backward_time=1.030, grad_norm=138.884, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.869e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 17:59:41,257 (trainer:732) INFO: 54epoch:train:7401-7500batch: iter_time=1.076e-04, forward_time=0.147, loss_ctc=72.240, loss_att=53.009, acc=0.727, loss=58.778, backward_time=1.029, grad_norm=151.515, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 17:59:45,954 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 18:00:04,194 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbec0fceb0>)
+[gpub001:0/64] 2023-07-15 18:00:07,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 18:00:07,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 18:05:23,755 (trainer:732) INFO: 54epoch:train:7501-7600batch: iter_time=1.578, forward_time=0.148, loss_ctc=63.070, loss_att=46.075, acc=0.730, loss=51.173, backward_time=1.046, grad_norm=104.077, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=6.850
+[gpub001:0/64] 2023-07-15 18:07:39,960 (trainer:732) INFO: 54epoch:train:7601-7700batch: iter_time=1.174e-04, forward_time=0.147, loss_ctc=61.589, loss_att=43.882, acc=0.723, loss=49.194, backward_time=1.027, grad_norm=131.469, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.868e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 18:09:56,626 (trainer:732) INFO: 54epoch:train:7701-7800batch: iter_time=1.127e-04, forward_time=0.149, loss_ctc=67.589, loss_att=49.630, acc=0.721, loss=55.018, backward_time=1.029, grad_norm=119.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 18:12:12,863 (trainer:732) INFO: 54epoch:train:7801-7900batch: iter_time=1.177e-04, forward_time=0.147, loss_ctc=68.700, loss_att=54.220, acc=0.708, loss=58.564, backward_time=1.028, grad_norm=137.783, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.867e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 18:14:41,639 (trainer:732) INFO: 54epoch:train:7901-8000batch: iter_time=5.809e-04, forward_time=0.239, loss_ctc=69.066, loss_att=52.174, acc=0.726, loss=57.241, backward_time=1.045, grad_norm=143.195, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.187, optim0_lr0=4.866e-05, train_time=2.975
+[gpub001:0/64] 2023-07-15 18:17:01,347 (trainer:732) INFO: 54epoch:train:8001-8100batch: iter_time=1.186e-04, forward_time=0.170, loss_ctc=74.493, loss_att=53.218, acc=0.731, loss=59.601, backward_time=1.032, grad_norm=137.223, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.866e-05, train_time=2.794
+[gpub001:0/64] 2023-07-15 18:19:39,609 (trainer:732) INFO: 54epoch:train:8101-8200batch: iter_time=1.190e-04, forward_time=0.154, loss_ctc=76.741, loss_att=61.285, acc=0.712, loss=65.922, backward_time=1.056, grad_norm=126.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=3.165
+[gpub001:0/64] 2023-07-15 18:21:56,191 (trainer:732) INFO: 54epoch:train:8201-8300batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=74.008, loss_att=52.223, acc=0.727, loss=58.758, backward_time=1.032, grad_norm=115.839, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.865e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 18:23:02,635 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 18:23:20,841 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc99417feb0>)
+[gpub001:0/64] 2023-07-15 18:23:24,297 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 18:23:24,303 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 18:29:23,956 (trainer:732) INFO: 54epoch:train:8301-8400batch: iter_time=2.345, forward_time=0.153, loss_ctc=64.437, loss_att=50.931, acc=0.730, loss=54.983, backward_time=1.080, grad_norm=113.050, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=8.955
+[gpub001:0/64] 2023-07-15 18:31:53,086 (trainer:732) INFO: 54epoch:train:8401-8500batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=60.709, loss_att=44.071, acc=0.715, loss=49.063, backward_time=1.058, grad_norm=119.273, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.864e-05, train_time=2.982
+[gpub001:0/64] 2023-07-15 18:34:12,345 (trainer:732) INFO: 54epoch:train:8501-8600batch: iter_time=1.015e-04, forward_time=0.146, loss_ctc=63.847, loss_att=44.978, acc=0.734, loss=50.639, backward_time=1.044, grad_norm=106.758, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.785
+[gpub001:0/64] 2023-07-15 18:36:33,395 (trainer:732) INFO: 54epoch:train:8601-8700batch: iter_time=1.047e-04, forward_time=0.145, loss_ctc=67.021, loss_att=51.807, acc=0.717, loss=56.371, backward_time=1.036, grad_norm=147.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.863e-05, train_time=2.821
+[gpub001:0/64] 2023-07-15 18:38:52,658 (trainer:732) INFO: 54epoch:train:8701-8800batch: iter_time=9.446e-05, forward_time=0.146, loss_ctc=71.996, loss_att=54.637, acc=0.721, loss=59.845, backward_time=1.031, grad_norm=134.907, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.785
+[gpub001:0/64] 2023-07-15 18:41:08,460 (trainer:732) INFO: 54epoch:train:8801-8900batch: iter_time=9.579e-05, forward_time=0.147, loss_ctc=67.011, loss_att=49.373, acc=0.731, loss=54.664, backward_time=1.028, grad_norm=118.447, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 18:43:31,785 (trainer:732) INFO: 54epoch:train:8901-9000batch: iter_time=9.115e-05, forward_time=0.147, loss_ctc=80.349, loss_att=63.204, acc=0.712, loss=68.347, backward_time=1.040, grad_norm=116.323, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.862e-05, train_time=2.866
+[gpub001:0/64] 2023-07-15 18:45:47,919 (trainer:732) INFO: 54epoch:train:9001-9100batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=72.525, loss_att=52.621, acc=0.728, loss=58.592, backward_time=1.031, grad_norm=131.623, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.861e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 18:47:20,788 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 18:47:38,830 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc970e1f4f0>)
+[gpub001:0/64] 2023-07-15 18:47:42,516 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 18:47:42,522 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 18:51:38,331 (trainer:732) INFO: 54epoch:train:9101-9200batch: iter_time=1.531, forward_time=0.173, loss_ctc=69.490, loss_att=52.765, acc=0.723, loss=57.782, backward_time=1.037, grad_norm=124.708, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.861e-05, train_time=7.008
+[gpub001:0/64] 2023-07-15 18:53:54,968 (trainer:732) INFO: 54epoch:train:9201-9300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=56.277, loss_att=41.943, acc=0.713, loss=46.244, backward_time=1.031, grad_norm=107.239, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 18:56:11,597 (trainer:732) INFO: 54epoch:train:9301-9400batch: iter_time=1.083e-04, forward_time=0.147, loss_ctc=66.657, loss_att=51.226, acc=0.713, loss=55.856, backward_time=1.029, grad_norm=120.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.860e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 18:58:28,213 (trainer:732) INFO: 54epoch:train:9401-9500batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=68.048, loss_att=49.821, acc=0.723, loss=55.289, backward_time=1.032, grad_norm=148.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 19:00:44,029 (trainer:732) INFO: 54epoch:train:9501-9600batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=70.255, loss_att=54.181, acc=0.708, loss=59.003, backward_time=1.028, grad_norm=132.345, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.859e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 19:03:00,419 (trainer:732) INFO: 54epoch:train:9601-9700batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=66.069, loss_att=50.762, acc=0.716, loss=55.354, backward_time=1.028, grad_norm=135.698, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.728
+[gpub001:0/64] 2023-07-15 19:05:16,211 (trainer:732) INFO: 54epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=76.996, loss_att=58.173, acc=0.719, loss=63.820, backward_time=1.029, grad_norm=132.925, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.858e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 19:07:32,296 (trainer:732) INFO: 54epoch:train:9801-9900batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=71.876, loss_att=58.022, acc=0.710, loss=62.178, backward_time=1.030, grad_norm=155.420, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 19:09:47,953 (trainer:732) INFO: 54epoch:train:9901-10000batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=72.341, loss_att=53.585, acc=0.723, loss=59.212, backward_time=1.026, grad_norm=135.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.857e-05, train_time=2.713
+[gpub001:0/64] 2023-07-15 19:23:03,469 (trainer:338) INFO: 54epoch results: [train] iter_time=0.248, forward_time=0.152, loss_ctc=69.871, loss_att=52.737, acc=0.715, loss=57.878, backward_time=1.034, grad_norm=133.170, clip=100.000, loss_scale=1.474e+32, optim_step_time=0.182, optim0_lr0=4.880e-05, train_time=3.409, time=4 hours, 44 minutes and 19.81 seconds, total_count=510000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=41.546, cer_ctc=0.245, loss_att=36.383, acc=0.679, cer=0.423, wer=1.000, loss=37.932, time=7 minutes and 4.7 seconds, total_count=52118, gpu_max_cached_mem_GB=37.635, [att_plot] time=5 minutes and 58.2 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-15 19:23:19,345 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/49epoch.pth
+[gpub001:0/64] 2023-07-15 19:23:19,357 (trainer:272) INFO: 55/60epoch started. Estimated time to finish: 1 day, 5 hours and 52 minutes
+[gpub001:0/64] 2023-07-15 19:23:19,377 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-15 19:23:37,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbec99fe50>)
+[gpub001:0/64] 2023-07-15 19:23:40,335 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-15 19:23:40,341 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 19:31:02,897 (trainer:732) INFO: 55epoch:train:1-100batch: iter_time=3.212, forward_time=0.179, loss_ctc=66.161, loss_att=47.370, acc=0.712, loss=53.007, backward_time=1.042, grad_norm=114.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.856e-05, train_time=9.270
+[gpub001:0/64] 2023-07-15 19:33:19,196 (trainer:732) INFO: 55epoch:train:101-200batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=80.026, loss_att=57.239, acc=0.710, loss=64.075, backward_time=1.030, grad_norm=155.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.856e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 19:35:36,675 (trainer:732) INFO: 55epoch:train:201-300batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=70.409, loss_att=49.481, acc=0.714, loss=55.759, backward_time=1.028, grad_norm=126.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.856e-05, train_time=2.749
+[gpub001:0/64] 2023-07-15 19:37:52,657 (trainer:732) INFO: 55epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=73.324, loss_att=56.085, acc=0.696, loss=61.256, backward_time=1.028, grad_norm=136.348, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 19:40:11,554 (trainer:732) INFO: 55epoch:train:401-500batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=68.927, loss_att=51.897, acc=0.701, loss=57.006, backward_time=1.027, grad_norm=132.079, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.855e-05, train_time=2.778
+[gpub001:0/64] 2023-07-15 19:42:27,271 (trainer:732) INFO: 55epoch:train:501-600batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=70.748, loss_att=54.161, acc=0.713, loss=59.137, backward_time=1.026, grad_norm=129.751, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 19:44:43,089 (trainer:732) INFO: 55epoch:train:601-700batch: iter_time=1.243e-04, forward_time=0.146, loss_ctc=70.260, loss_att=56.700, acc=0.709, loss=60.768, backward_time=1.027, grad_norm=134.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.854e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 19:47:02,647 (trainer:732) INFO: 55epoch:train:701-800batch: iter_time=1.169e-04, forward_time=0.159, loss_ctc=61.315, loss_att=47.512, acc=0.708, loss=51.653, backward_time=1.032, grad_norm=119.787, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.853e-05, train_time=2.791
+[gpub001:0/64] 2023-07-15 19:47:56,741 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-15 19:48:14,565 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5af1ddde0>)
+[gpub001:0/64] 2023-07-15 19:48:17,927 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub001:0/64] 2023-07-15 19:48:17,934 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 19:52:24,523 (trainer:732) INFO: 55epoch:train:801-900batch: iter_time=1.522, forward_time=0.203, loss_ctc=69.155, loss_att=52.160, acc=0.709, loss=57.258, backward_time=1.045, grad_norm=146.382, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.853e-05, train_time=6.437
+[gpub001:0/64] 2023-07-15 19:54:41,572 (trainer:732) INFO: 55epoch:train:901-1000batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=70.035, loss_att=56.263, acc=0.706, loss=60.395, backward_time=1.032, grad_norm=129.876, clip=100.000, loss_scale=3.115e+32, optim_step_time=0.182, optim0_lr0=4.852e-05, train_time=2.741
+[gpub001:0/64] 2023-07-15 19:56:57,064 (trainer:732) INFO: 55epoch:train:1001-1100batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=78.100, loss_att=53.099, acc=0.718, loss=60.599, backward_time=1.025, grad_norm=139.253, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.852e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 19:59:12,943 (trainer:732) INFO: 55epoch:train:1101-1200batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=74.777, loss_att=55.606, acc=0.702, loss=61.357, backward_time=1.027, grad_norm=126.416, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.851e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 20:01:28,825 (trainer:732) INFO: 55epoch:train:1201-1300batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.023, loss_att=53.038, acc=0.705, loss=57.833, backward_time=1.029, grad_norm=128.178, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 20:03:45,287 (trainer:732) INFO: 55epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.147, loss_ctc=65.828, loss_att=47.995, acc=0.715, loss=53.345, backward_time=1.029, grad_norm=139.823, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.851e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 20:04:15,026 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-15 20:06:00,814 (trainer:732) INFO: 55epoch:train:1401-1500batch: iter_time=8.046e-04, forward_time=0.145, loss_ctc=70.081, loss_att=54.160, acc=0.716, loss=58.936, backward_time=1.028, grad_norm=161.967, clip=100.000, loss_scale=1.954e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.710
+[gpub001:0/64] 2023-07-15 20:08:16,706 (trainer:732) INFO: 55epoch:train:1501-1600batch: iter_time=1.300e-04, forward_time=0.148, loss_ctc=65.333, loss_att=50.645, acc=0.717, loss=55.052, backward_time=1.028, grad_norm=125.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.850e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 20:09:58,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-15 20:10:16,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcc1487bc10>)
+[gpub001:0/64] 2023-07-15 20:10:20,102 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-15 20:10:20,108 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 20:15:05,333 (trainer:732) INFO: 55epoch:train:1601-1700batch: iter_time=2.640, forward_time=0.161, loss_ctc=69.772, loss_att=56.145, acc=0.702, loss=60.233, backward_time=1.045, grad_norm=123.498, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.849e-05, train_time=8.172
+[gpub001:0/64] 2023-07-15 20:17:22,257 (trainer:732) INFO: 55epoch:train:1701-1800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=71.715, loss_att=51.174, acc=0.722, loss=57.336, backward_time=1.032, grad_norm=144.148, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.849e-05, train_time=2.738
+[gpub001:0/64] 2023-07-15 20:19:38,257 (trainer:732) INFO: 55epoch:train:1801-1900batch: iter_time=1.163e-04, forward_time=0.146, loss_ctc=71.559, loss_att=54.084, acc=0.725, loss=59.327, backward_time=1.029, grad_norm=122.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 20:21:54,506 (trainer:732) INFO: 55epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=72.459, loss_att=49.201, acc=0.735, loss=56.178, backward_time=1.029, grad_norm=122.229, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.848e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 20:24:10,321 (trainer:732) INFO: 55epoch:train:2001-2100batch: iter_time=1.121e-04, forward_time=0.146, loss_ctc=72.859, loss_att=55.435, acc=0.707, loss=60.662, backward_time=1.027, grad_norm=135.154, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.716
+[gpub001:0/64] 2023-07-15 20:26:25,933 (trainer:732) INFO: 55epoch:train:2101-2200batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=69.278, loss_att=52.189, acc=0.717, loss=57.316, backward_time=1.026, grad_norm=183.531, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.847e-05, train_time=2.712
+[gpub001:0/64] 2023-07-15 20:28:41,637 (trainer:732) INFO: 55epoch:train:2201-2300batch: iter_time=1.061e-04, forward_time=0.146, loss_ctc=67.008, loss_att=48.170, acc=0.728, loss=53.821, backward_time=1.027, grad_norm=136.904, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.714
+[gpub001:0/64] 2023-07-15 20:30:57,552 (trainer:732) INFO: 55epoch:train:2301-2400batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=69.173, loss_att=54.867, acc=0.725, loss=59.159, backward_time=1.028, grad_norm=129.937, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 20:33:13,592 (trainer:732) INFO: 55epoch:train:2401-2500batch: iter_time=1.065e-04, forward_time=0.146, loss_ctc=64.746, loss_att=49.739, acc=0.715, loss=54.241, backward_time=1.028, grad_norm=119.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.846e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 20:33:16,890 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-15 20:33:34,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fca75f93520>)
+[gpub001:0/64] 2023-07-15 20:33:38,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-15 20:33:38,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 20:39:15,364 (trainer:732) INFO: 55epoch:train:2501-2600batch: iter_time=1.324, forward_time=0.155, loss_ctc=75.159, loss_att=52.705, acc=0.723, loss=59.441, backward_time=1.050, grad_norm=172.779, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=7.235
+[gpub001:0/64] 2023-07-15 20:41:32,183 (trainer:732) INFO: 55epoch:train:2601-2700batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=69.915, loss_att=53.588, acc=0.721, loss=58.486, backward_time=1.031, grad_norm=134.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.845e-05, train_time=2.736
+[gpub001:0/64] 2023-07-15 20:43:48,482 (trainer:732) INFO: 55epoch:train:2701-2800batch: iter_time=1.217e-04, forward_time=0.147, loss_ctc=76.857, loss_att=50.857, acc=0.733, loss=58.657, backward_time=1.031, grad_norm=125.091, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 20:46:04,499 (trainer:732) INFO: 55epoch:train:2801-2900batch: iter_time=1.144e-04, forward_time=0.146, loss_ctc=70.084, loss_att=55.592, acc=0.713, loss=59.939, backward_time=1.030, grad_norm=156.149, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.844e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 20:48:20,501 (trainer:732) INFO: 55epoch:train:2901-3000batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=65.972, loss_att=49.663, acc=0.717, loss=54.556, backward_time=1.030, grad_norm=162.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.843e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 20:50:45,633 (trainer:732) INFO: 55epoch:train:3001-3100batch: iter_time=1.104e-04, forward_time=0.194, loss_ctc=66.366, loss_att=46.152, acc=0.727, loss=52.216, backward_time=1.050, grad_norm=136.185, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=4.843e-05, train_time=2.901
+[gpub001:0/64] 2023-07-15 20:53:05,403 (trainer:732) INFO: 55epoch:train:3101-3200batch: iter_time=1.162e-04, forward_time=0.173, loss_ctc=72.329, loss_att=57.968, acc=0.722, loss=62.276, backward_time=1.030, grad_norm=129.959, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.842e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 20:55:21,845 (trainer:732) INFO: 55epoch:train:3201-3300batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=60.730, loss_att=48.110, acc=0.725, loss=51.896, backward_time=1.030, grad_norm=110.140, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.842e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 20:56:24,324 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-15 20:56:42,220 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcbbb4db4f0>)
+[gpub001:0/64] 2023-07-15 20:56:45,641 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-15 20:56:45,671 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 21:01:16,198 (trainer:732) INFO: 55epoch:train:3301-3400batch: iter_time=2.037, forward_time=0.159, loss_ctc=65.648, loss_att=46.081, acc=0.723, loss=51.951, backward_time=1.054, grad_norm=141.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.841e-05, train_time=7.087
+[gpub001:0/64] 2023-07-15 21:03:32,649 (trainer:732) INFO: 55epoch:train:3401-3500batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=68.666, loss_att=55.199, acc=0.722, loss=59.239, backward_time=1.030, grad_norm=148.789, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.729
+[gpub001:0/64] 2023-07-15 21:05:48,957 (trainer:732) INFO: 55epoch:train:3501-3600batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=75.185, loss_att=50.223, acc=0.733, loss=57.711, backward_time=1.031, grad_norm=142.225, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.841e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 21:08:05,090 (trainer:732) INFO: 55epoch:train:3601-3700batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=72.633, loss_att=54.354, acc=0.712, loss=59.838, backward_time=1.029, grad_norm=156.626, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 21:10:21,444 (trainer:732) INFO: 55epoch:train:3701-3800batch: iter_time=1.204e-04, forward_time=0.147, loss_ctc=69.561, loss_att=51.973, acc=0.725, loss=57.249, backward_time=1.030, grad_norm=129.593, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.840e-05, train_time=2.727
+[gpub001:0/64] 2023-07-15 21:12:37,572 (trainer:732) INFO: 55epoch:train:3801-3900batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=64.566, loss_att=47.497, acc=0.722, loss=52.618, backward_time=1.027, grad_norm=133.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.722
+[gpub001:0/64] 2023-07-15 21:14:53,769 (trainer:732) INFO: 55epoch:train:3901-4000batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=67.393, loss_att=53.835, acc=0.726, loss=57.902, backward_time=1.029, grad_norm=140.927, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.839e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 21:17:09,543 (trainer:732) INFO: 55epoch:train:4001-4100batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=65.438, loss_att=51.648, acc=0.716, loss=55.785, backward_time=1.027, grad_norm=112.494, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.838e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 21:18:46,330 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-15 21:19:04,591 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fdbf648ffd0>)
+[gpub001:0/64] 2023-07-15 21:19:08,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-15 21:19:08,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 21:23:46,402 (trainer:732) INFO: 55epoch:train:4101-4200batch: iter_time=1.571, forward_time=0.171, loss_ctc=69.400, loss_att=56.671, acc=0.707, loss=60.489, backward_time=1.040, grad_norm=139.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=4.838e-05, train_time=7.937
+[gpub001:0/64] 2023-07-15 21:26:03,353 (trainer:732) INFO: 55epoch:train:4201-4300batch: iter_time=1.202e-04, forward_time=0.146, loss_ctc=71.370, loss_att=51.067, acc=0.719, loss=57.158, backward_time=1.033, grad_norm=124.956, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.739
+[gpub001:0/64] 2023-07-15 21:28:19,368 (trainer:732) INFO: 55epoch:train:4301-4400batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.788, loss_att=55.523, acc=0.716, loss=60.102, backward_time=1.030, grad_norm=145.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.837e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 21:30:35,376 (trainer:732) INFO: 55epoch:train:4401-4500batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=73.170, loss_att=49.309, acc=0.727, loss=56.467, backward_time=1.030, grad_norm=177.689, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.720
+[gpub001:0/64] 2023-07-15 21:32:57,674 (trainer:732) INFO: 55epoch:train:4501-4600batch: iter_time=1.231e-04, forward_time=0.170, loss_ctc=71.775, loss_att=55.086, acc=0.701, loss=60.093, backward_time=1.065, grad_norm=138.671, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=4.836e-05, train_time=2.845
+[gpub001:0/64] 2023-07-15 21:35:18,977 (trainer:732) INFO: 55epoch:train:4601-4700batch: iter_time=1.292e-04, forward_time=0.163, loss_ctc=68.115, loss_att=52.067, acc=0.705, loss=56.882, backward_time=1.030, grad_norm=146.040, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.836e-05, train_time=2.826
+[gpub001:0/64] 2023-07-15 21:37:37,938 (trainer:732) INFO: 55epoch:train:4701-4800batch: iter_time=1.370e-04, forward_time=0.146, loss_ctc=67.563, loss_att=48.505, acc=0.725, loss=54.222, backward_time=1.041, grad_norm=114.887, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.779
+[gpub001:0/64] 2023-07-15 21:39:53,753 (trainer:732) INFO: 55epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.293, loss_att=55.110, acc=0.720, loss=59.365, backward_time=1.028, grad_norm=141.733, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.835e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 21:42:09,547 (trainer:732) INFO: 55epoch:train:4901-5000batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=62.874, loss_att=48.554, acc=0.716, loss=52.850, backward_time=1.029, grad_norm=127.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.834e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 21:42:13,217 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-15 21:42:31,219 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd5b09af7c0>)
+[gpub001:0/64] 2023-07-15 21:42:34,649 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub001:0/64] 2023-07-15 21:42:34,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 21:48:26,273 (trainer:732) INFO: 55epoch:train:5001-5100batch: iter_time=1.355, forward_time=0.182, loss_ctc=63.212, loss_att=44.533, acc=0.732, loss=50.136, backward_time=1.043, grad_norm=148.605, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=7.534
+[gpub001:0/64] 2023-07-15 21:50:42,538 (trainer:732) INFO: 55epoch:train:5101-5200batch: iter_time=9.720e-05, forward_time=0.144, loss_ctc=75.917, loss_att=55.962, acc=0.723, loss=61.949, backward_time=1.030, grad_norm=125.059, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=4.833e-05, train_time=2.725
+[gpub001:0/64] 2023-07-15 21:52:58,854 (trainer:732) INFO: 55epoch:train:5201-5300batch: iter_time=8.952e-05, forward_time=0.144, loss_ctc=68.540, loss_att=46.952, acc=0.732, loss=53.429, backward_time=1.030, grad_norm=136.243, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.833e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 21:55:19,419 (trainer:732) INFO: 55epoch:train:5301-5400batch: iter_time=8.648e-05, forward_time=0.144, loss_ctc=72.817, loss_att=54.724, acc=0.714, loss=60.152, backward_time=1.034, grad_norm=142.694, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.811
+[gpub001:0/64] 2023-07-15 21:57:36,018 (trainer:732) INFO: 55epoch:train:5401-5500batch: iter_time=9.088e-05, forward_time=0.144, loss_ctc=68.000, loss_att=50.279, acc=0.722, loss=55.595, backward_time=1.030, grad_norm=120.000, clip=100.000, loss_scale=2.888e+32, optim_step_time=0.182, optim0_lr0=4.832e-05, train_time=2.732
+[gpub001:0/64] 2023-07-15 21:59:52,599 (trainer:732) INFO: 55epoch:train:5501-5600batch: iter_time=9.922e-05, forward_time=0.145, loss_ctc=70.562, loss_att=53.030, acc=0.727, loss=58.290, backward_time=1.031, grad_norm=151.714, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 22:02:08,924 (trainer:732) INFO: 55epoch:train:5601-5700batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=67.520, loss_att=56.170, acc=0.720, loss=59.575, backward_time=1.030, grad_norm=179.978, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.831e-05, train_time=2.726
+[gpub001:0/64] 2023-07-15 22:04:27,557 (trainer:732) INFO: 55epoch:train:5701-5800batch: iter_time=9.311e-05, forward_time=0.144, loss_ctc=60.189, loss_att=46.927, acc=0.714, loss=50.906, backward_time=1.031, grad_norm=118.818, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.831e-05, train_time=2.772
+[gpub001:0/64] 2023-07-15 22:05:31,121 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-15 22:05:48,967 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc968d6bf70>)
+[gpub001:0/64] 2023-07-15 22:05:52,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-15 22:05:52,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 22:12:10,087 (trainer:732) INFO: 55epoch:train:5801-5900batch: iter_time=3.061, forward_time=0.185, loss_ctc=75.455, loss_att=58.296, acc=0.718, loss=63.444, backward_time=1.105, grad_norm=123.117, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=4.830e-05, train_time=9.250
+[gpub001:0/64] 2023-07-15 22:14:27,175 (trainer:732) INFO: 55epoch:train:5901-6000batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=68.118, loss_att=46.658, acc=0.734, loss=53.096, backward_time=1.031, grad_norm=177.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.830e-05, train_time=2.742
+[gpub001:0/64] 2023-07-15 22:16:43,727 (trainer:732) INFO: 55epoch:train:6001-6100batch: iter_time=1.079e-04, forward_time=0.146, loss_ctc=79.566, loss_att=57.355, acc=0.725, loss=64.019, backward_time=1.031, grad_norm=166.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 22:18:59,637 (trainer:732) INFO: 55epoch:train:6101-6200batch: iter_time=1.009e-04, forward_time=0.144, loss_ctc=69.002, loss_att=52.731, acc=0.722, loss=57.612, backward_time=1.029, grad_norm=119.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.829e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 22:21:16,210 (trainer:732) INFO: 55epoch:train:6201-6300batch: iter_time=1.116e-04, forward_time=0.145, loss_ctc=68.288, loss_att=50.084, acc=0.721, loss=55.545, backward_time=1.030, grad_norm=139.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.731
+[gpub001:0/64] 2023-07-15 22:23:44,539 (trainer:732) INFO: 55epoch:train:6301-6400batch: iter_time=1.056e-04, forward_time=0.144, loss_ctc=67.344, loss_att=48.425, acc=0.728, loss=54.101, backward_time=1.045, grad_norm=133.354, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.828e-05, train_time=2.966
+[gpub001:0/64] 2023-07-15 22:26:07,503 (trainer:732) INFO: 55epoch:train:6401-6500batch: iter_time=1.176e-04, forward_time=0.152, loss_ctc=72.129, loss_att=56.674, acc=0.724, loss=61.311, backward_time=1.051, grad_norm=128.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.827e-05, train_time=2.859
+[gpub001:0/64] 2023-07-15 22:28:29,981 (trainer:732) INFO: 55epoch:train:6501-6600batch: iter_time=1.072e-04, forward_time=0.146, loss_ctc=62.301, loss_att=49.950, acc=0.726, loss=53.655, backward_time=1.040, grad_norm=125.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.827e-05, train_time=2.849
+[gpub001:0/64] 2023-07-15 22:30:06,709 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-15 22:30:25,064 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fc8ef08ca60>)
+[gpub001:0/64] 2023-07-15 22:30:28,526 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-15 22:30:28,532 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 22:36:04,422 (trainer:732) INFO: 55epoch:train:6601-6700batch: iter_time=1.567, forward_time=0.148, loss_ctc=66.536, loss_att=49.542, acc=0.714, loss=54.640, backward_time=1.031, grad_norm=147.774, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.827e-05, train_time=9.089
+[gpub001:0/64] 2023-07-15 22:38:24,208 (trainer:732) INFO: 55epoch:train:6701-6800batch: iter_time=1.030e-04, forward_time=0.167, loss_ctc=70.272, loss_att=49.751, acc=0.724, loss=55.907, backward_time=1.037, grad_norm=112.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.826e-05, train_time=2.796
+[gpub001:0/64] 2023-07-15 22:40:41,823 (trainer:732) INFO: 55epoch:train:6801-6900batch: iter_time=9.641e-05, forward_time=0.146, loss_ctc=70.101, loss_att=53.067, acc=0.720, loss=58.177, backward_time=1.032, grad_norm=122.968, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.826e-05, train_time=2.752
+[gpub001:0/64] 2023-07-15 22:42:59,328 (trainer:732) INFO: 55epoch:train:6901-7000batch: iter_time=9.366e-05, forward_time=0.145, loss_ctc=72.478, loss_att=49.337, acc=0.730, loss=56.279, backward_time=1.029, grad_norm=129.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.825e-05, train_time=2.750
+[gpub001:0/64] 2023-07-15 22:45:16,265 (trainer:732) INFO: 55epoch:train:7001-7100batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.021, loss_att=55.230, acc=0.700, loss=60.268, backward_time=1.029, grad_norm=126.971, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.825e-05, train_time=2.739
+[gpub001:0/64] 2023-07-15 22:47:32,473 (trainer:732) INFO: 55epoch:train:7101-7200batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=67.237, loss_att=51.993, acc=0.709, loss=56.566, backward_time=1.029, grad_norm=119.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.724
+[gpub001:0/64] 2023-07-15 22:49:48,357 (trainer:732) INFO: 55epoch:train:7201-7300batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=67.345, loss_att=47.923, acc=0.726, loss=53.750, backward_time=1.029, grad_norm=118.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.824e-05, train_time=2.717
+[gpub001:0/64] 2023-07-15 22:52:04,142 (trainer:732) INFO: 55epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=68.462, loss_att=53.569, acc=0.725, loss=58.037, backward_time=1.028, grad_norm=138.314, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.823e-05, train_time=2.715
+[gpub001:0/64] 2023-07-15 22:54:24,008 (trainer:732) INFO: 55epoch:train:7401-7500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.726, loss_att=49.352, acc=0.717, loss=53.664, backward_time=1.035, grad_norm=118.056, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.823e-05, train_time=2.797
+[gpub001:0/64] 2023-07-15 22:54:29,095 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-15 22:54:47,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcbbad37fd0>)
+[gpub001:0/64] 2023-07-15 22:54:50,600 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-15 22:54:50,606 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 22:59:53,940 (trainer:732) INFO: 55epoch:train:7501-7600batch: iter_time=1.342, forward_time=0.179, loss_ctc=75.400, loss_att=51.040, acc=0.730, loss=58.348, backward_time=1.045, grad_norm=136.571, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=6.598
+[gpub001:0/64] 2023-07-15 23:02:10,577 (trainer:732) INFO: 55epoch:train:7601-7700batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=69.202, loss_att=52.650, acc=0.727, loss=57.616, backward_time=1.028, grad_norm=137.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.733
+[gpub001:0/64] 2023-07-15 23:04:27,339 (trainer:732) INFO: 55epoch:train:7701-7800batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=75.729, loss_att=50.295, acc=0.738, loss=57.925, backward_time=1.033, grad_norm=129.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.822e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 23:06:43,267 (trainer:732) INFO: 55epoch:train:7801-7900batch: iter_time=1.329e-04, forward_time=0.147, loss_ctc=68.754, loss_att=54.027, acc=0.720, loss=58.445, backward_time=1.030, grad_norm=130.834, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=2.718
+[gpub001:0/64] 2023-07-15 23:09:14,681 (trainer:732) INFO: 55epoch:train:7901-8000batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=65.383, loss_att=49.653, acc=0.720, loss=54.372, backward_time=1.082, grad_norm=143.167, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.821e-05, train_time=3.028
+[gpub001:0/64] 2023-07-15 23:11:49,953 (trainer:732) INFO: 55epoch:train:8001-8100batch: iter_time=1.179e-04, forward_time=0.148, loss_ctc=65.409, loss_att=44.547, acc=0.734, loss=50.806, backward_time=1.053, grad_norm=133.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=3.105
+[gpub001:0/64] 2023-07-15 23:14:06,705 (trainer:732) INFO: 55epoch:train:8101-8200batch: iter_time=1.099e-04, forward_time=0.148, loss_ctc=71.658, loss_att=56.942, acc=0.730, loss=61.357, backward_time=1.035, grad_norm=138.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.820e-05, train_time=2.735
+[gpub001:0/64] 2023-07-15 23:16:22,766 (trainer:732) INFO: 55epoch:train:8201-8300batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=60.074, loss_att=47.491, acc=0.727, loss=51.266, backward_time=1.030, grad_norm=119.828, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=2.721
+[gpub001:0/64] 2023-07-15 23:17:21,807 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub001:0/64] 2023-07-15 23:17:40,310 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd0723934f0>)
+[gpub001:0/64] 2023-07-15 23:17:44,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-15 23:17:44,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 23:22:06,581 (trainer:732) INFO: 55epoch:train:8301-8400batch: iter_time=1.931, forward_time=0.162, loss_ctc=70.513, loss_att=50.076, acc=0.728, loss=56.207, backward_time=1.053, grad_norm=120.350, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.819e-05, train_time=6.876
+[gpub001:0/64] 2023-07-15 23:24:24,068 (trainer:732) INFO: 55epoch:train:8401-8500batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=66.708, loss_att=47.369, acc=0.724, loss=53.171, backward_time=1.031, grad_norm=116.972, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.750
+[gpub001:0/64] 2023-07-15 23:26:42,851 (trainer:732) INFO: 55epoch:train:8501-8600batch: iter_time=1.086e-04, forward_time=0.167, loss_ctc=78.244, loss_att=58.054, acc=0.714, loss=64.111, backward_time=1.031, grad_norm=173.126, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.818e-05, train_time=2.775
+[gpub001:0/64] 2023-07-15 23:29:02,728 (trainer:732) INFO: 55epoch:train:8601-8700batch: iter_time=1.328e-04, forward_time=0.155, loss_ctc=69.989, loss_att=54.707, acc=0.715, loss=59.291, backward_time=1.031, grad_norm=149.360, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.818e-05, train_time=2.797
+[gpub001:0/64] 2023-07-15 23:31:26,010 (trainer:732) INFO: 55epoch:train:8701-8800batch: iter_time=1.131e-04, forward_time=0.196, loss_ctc=67.734, loss_att=49.675, acc=0.708, loss=55.093, backward_time=1.032, grad_norm=151.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.199, optim0_lr0=4.817e-05, train_time=2.865
+[gpub001:0/64] 2023-07-15 23:33:45,012 (trainer:732) INFO: 55epoch:train:8801-8900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=66.717, loss_att=48.204, acc=0.719, loss=53.758, backward_time=1.032, grad_norm=131.081, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.817e-05, train_time=2.780
+[gpub001:0/64] 2023-07-15 23:36:00,960 (trainer:732) INFO: 55epoch:train:8901-9000batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=70.848, loss_att=55.620, acc=0.725, loss=60.188, backward_time=1.029, grad_norm=143.176, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.816e-05, train_time=2.719
+[gpub001:0/64] 2023-07-15 23:38:18,241 (trainer:732) INFO: 55epoch:train:9001-9100batch: iter_time=1.011e-04, forward_time=0.150, loss_ctc=62.371, loss_att=49.098, acc=0.726, loss=53.080, backward_time=1.032, grad_norm=147.200, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.816e-05, train_time=2.745
+[gpub001:0/64] 2023-07-15 23:40:07,256 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub001:0/64] 2023-07-15 23:40:25,764 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fcbbd06e440>)
+[gpub001:0/64] 2023-07-15 23:40:29,558 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-15 23:40:29,564 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-15 23:45:21,086 (trainer:732) INFO: 55epoch:train:9101-9200batch: iter_time=2.793, forward_time=0.191, loss_ctc=66.256, loss_att=48.196, acc=0.723, loss=53.614, backward_time=1.044, grad_norm=112.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=4.815e-05, train_time=8.457
+[gpub001:0/64] 2023-07-15 23:47:43,332 (trainer:732) INFO: 55epoch:train:9201-9300batch: iter_time=9.859e-05, forward_time=0.145, loss_ctc=70.085, loss_att=49.743, acc=0.725, loss=55.846, backward_time=1.042, grad_norm=152.031, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.815e-05, train_time=2.845
+[gpub001:0/64] 2023-07-15 23:50:02,620 (trainer:732) INFO: 55epoch:train:9301-9400batch: iter_time=9.019e-05, forward_time=0.145, loss_ctc=69.933, loss_att=52.729, acc=0.721, loss=57.890, backward_time=1.040, grad_norm=139.705, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.786
+[gpub001:0/64] 2023-07-15 23:52:24,700 (trainer:732) INFO: 55epoch:train:9401-9500batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=72.083, loss_att=48.816, acc=0.733, loss=55.796, backward_time=1.032, grad_norm=144.424, clip=100.000, loss_scale=5.776e+32, optim_step_time=0.181, optim0_lr0=4.814e-05, train_time=2.841
+[gpub001:0/64] 2023-07-15 23:54:49,710 (trainer:732) INFO: 55epoch:train:9501-9600batch: iter_time=1.283e-04, forward_time=0.196, loss_ctc=70.036, loss_att=53.272, acc=0.706, loss=58.301, backward_time=1.036, grad_norm=201.540, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=4.814e-05, train_time=2.899
+[gpub001:0/64] 2023-07-15 23:57:14,693 (trainer:732) INFO: 55epoch:train:9601-9700batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=67.979, loss_att=52.018, acc=0.708, loss=56.807, backward_time=1.042, grad_norm=195.141, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=4.813e-05, train_time=2.900
+[gpub001:0/64] 2023-07-15 23:59:36,152 (trainer:732) INFO: 55epoch:train:9701-9800batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=68.810, loss_att=49.128, acc=0.729, loss=55.032, backward_time=1.052, grad_norm=143.908, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.813e-05, train_time=2.829
+[gpub001:0/64] 2023-07-16 00:02:00,185 (trainer:732) INFO: 55epoch:train:9801-9900batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=66.852, loss_att=52.866, acc=0.723, loss=57.062, backward_time=1.040, grad_norm=130.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.881
+[gpub001:0/64] 2023-07-16 00:04:16,085 (trainer:732) INFO: 55epoch:train:9901-10000batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=62.767, loss_att=47.965, acc=0.721, loss=52.406, backward_time=1.029, grad_norm=113.224, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=4.812e-05, train_time=2.718
+[gpub001:0/64] 2023-07-16 00:16:37,923 (trainer:338) INFO: 55epoch results: [train] iter_time=0.244, forward_time=0.152, loss_ctc=69.296, loss_att=51.744, acc=0.719, loss=57.010, backward_time=1.035, grad_norm=137.602, clip=100.000, loss_scale=2.636e+32, optim_step_time=0.182, optim0_lr0=4.834e-05, train_time=3.371, time=4 hours, 41 minutes and 10.78 seconds, total_count=520000, gpu_max_cached_mem_GB=37.635, [valid] loss_ctc=42.158, cer_ctc=0.245, loss_att=35.877, acc=0.700, cer=0.361, wer=0.989, loss=37.761, time=6 minutes and 6.77 seconds, total_count=53130, gpu_max_cached_mem_GB=37.635, [att_plot] time=6 minutes and 0.96 seconds, total_count=0, gpu_max_cached_mem_GB=37.635
+[gpub001:0/64] 2023-07-16 00:16:57,201 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-16 00:16:57,394 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/38epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/50epoch.pth
+[gpub001:0/64] 2023-07-16 00:16:57,395 (trainer:272) INFO: 56/60epoch started. Estimated time to finish: 1 day, 49 minutes and 46.32 seconds
+[gpub001:0/64] 2023-07-16 00:16:59,197 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-16 00:17:17,371 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fd145a0c880>)
+[gpub001:0/64] 2023-07-16 00:17:22,642 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-16 00:17:22,740 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-16 00:25:03,887 (trainer:732) INFO: 56epoch:train:1-100batch: iter_time=3.403, forward_time=0.191, loss_ctc=67.004, loss_att=52.989, acc=0.697, loss=57.193, backward_time=1.047, grad_norm=134.202, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=4.811e-05, train_time=9.711
+[gpub001:0/64] 2023-07-16 00:26:04,353 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub001:0/64] 2023-07-16 00:27:28,104 (trainer:732) INFO: 56epoch:train:101-200batch: iter_time=9.493e-05, forward_time=0.145, loss_ctc=68.220, loss_att=50.035, acc=0.712, loss=55.491, backward_time=1.051, grad_norm=141.648, clip=100.000, loss_scale=4.570e+32, optim_step_time=0.182, optim0_lr0=4.811e-05, train_time=2.884
+[gpub001:0/64] 2023-07-16 00:29:45,320 (trainer:732) INFO: 56epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.143, loss_ctc=82.452, loss_att=59.724, acc=0.705, loss=66.542, backward_time=1.029, grad_norm=146.750, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744
+[gpub001:0/64] 2023-07-16 00:32:02,788 (trainer:732) INFO: 56epoch:train:301-400batch: iter_time=9.678e-05, forward_time=0.144, loss_ctc=72.797, loss_att=51.224, acc=0.713, loss=57.695, backward_time=1.028, grad_norm=135.271, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.749
+[gpub001:0/64] 2023-07-16 00:34:19,996 (trainer:732) INFO: 56epoch:train:401-500batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=64.462, loss_att=47.713, acc=0.715, loss=52.738, backward_time=1.028, grad_norm=123.919, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.810e-05, train_time=2.744
+[gpub001:0/64] 2023-07-16 00:36:48,635 (trainer:732) INFO: 56epoch:train:501-600batch: iter_time=2.047e-04, forward_time=0.231, loss_ctc=68.197, loss_att=52.344, acc=0.718, loss=57.100, backward_time=1.043, grad_norm=143.676, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.972
+[gpub001:0/64] 2023-07-16 00:39:11,940 (trainer:732) INFO: 56epoch:train:601-700batch: iter_time=7.600e-04, forward_time=0.198, loss_ctc=77.810, loss_att=56.459, acc=0.705, loss=62.865, backward_time=1.034, grad_norm=136.066, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=4.809e-05, train_time=2.866
+[gpub001:0/64] 2023-07-16 00:41:30,733 (trainer:732) INFO: 56epoch:train:701-800batch: iter_time=9.654e-05, forward_time=0.146, loss_ctc=68.615, loss_att=51.269, acc=0.712, loss=56.472, backward_time=1.030, grad_norm=132.539, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=4.808e-05, train_time=2.776
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
+slurmstepd: error: *** STEP 2157595.0 ON gpub001 CANCELLED AT 2023-07-16T00:41:51 ***
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.10.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.10.log
new file mode 100644
index 0000000000000000000000000000000000000000..96816758eecc0dfd3614fbd93317422e486ba465
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.10.log
@@ -0,0 +1,4404 @@
+# Running on gpua014.delta.ncsa.illinois.edu
+# Started at Mon Jul 3 02:21:56 CDT 2023
+# SLURMD_NODENAME=gpua014
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2118951
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2118951
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpua[014-016,018,020-022,041,060,062-063,068,088,091,093,096]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA100x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpua[014-016,018,020-022,041,060,062-063,068,088,091,093,096]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=1504675
+# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua014
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_5aa952dd-4105-4ef3-9df5-4299dfe3670d
+[gpua014:0/64] 2023-07-03 02:24:21,574 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpua014:0/64] 2023-07-03 02:24:24,018 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpua014:0/64] 2023-07-03 02:24:24,051 (s2t:483) INFO: Vocabulary size: 50002
+[gpua014:0/64] 2023-07-03 02:24:39,279 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpua014:0/64] 2023-07-03 02:24:39,288 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpua014:0/64] 2023-07-03 02:24:39,288 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpua014:0/64] 2023-07-03 02:24:39,288 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpua014:0/64] 2023-07-03 02:24:39,290 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpua014:0/64] 2023-07-03 02:24:39,991 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpua014:0/64] 2023-07-03 02:24:48,871 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 02:24:49,062 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4fc7557e50>)
+[gpua014:0/64] 2023-07-03 02:24:49,062 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua014:0/64] 2023-07-03 02:24:49,071 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpua014:0/64] 2023-07-03 02:24:49,550 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 02:24:49,856 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4fc7557af0>)
+[gpua014:0/64] 2023-07-03 02:24:49,856 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua014:0/64] 2023-07-03 02:24:49,856 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpua014:0/64] 2023-07-03 02:25:21,792 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpua014:1504761:1504761 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:1504761:1504761 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:1504761:1504761 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpua014:0/64] 2023-07-03 02:25:26,795 (trainer:284) INFO: 6/100epoch started
+[gpua014:0/64] 2023-07-03 02:25:26,841 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-07-03 02:25:48,338 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 02:25:52,384 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4efa33a5f0>)
+[gpua014:0/64] 2023-07-03 02:25:52,384 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpua014:0/64] 2023-07-03 02:25:52,392 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+gpua018:3479289:3479289 [2] NCCL INFO cudaDriverVersion 12010
+gpua018:3479289:3479289 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0>
+gpua018:3479289:3479289 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua018:3479289:3479368 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0>
+gpua018:3479289:3479368 [2] NCCL INFO Using network IB
+gpua018:3479289:3479368 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua018:3479289:3479368 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpua018:3479289:3479368 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua018:3479289:3479368 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua018:3479289:3479368 [2] NCCL INFO Connected all rings
+gpua018:3479289:3479368 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua018:3479289:3479368 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua018:3479289:3479368 [2] NCCL INFO Connected all trees
+gpua018:3479289:3479368 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua018:3479289:3479368 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua018:3479289:3479368 [2] NCCL INFO comm 0x50f23dd0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua018:3479290:3479290 [3] NCCL INFO cudaDriverVersion 12010
+gpua018:3479290:3479290 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0>
+gpua018:3479290:3479290 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua018:3479290:3479367 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0>
+gpua018:3479290:3479367 [3] NCCL INFO Using network IB
+gpua018:3479290:3479367 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua018:3479290:3479367 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpua018:3479290:3479367 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua018:3479290:3479367 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua018:3479290:3479367 [3] NCCL INFO Connected all rings
+gpua018:3479290:3479367 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua018:3479290:3479367 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua018:3479290:3479367 [3] NCCL INFO Connected all trees
+gpua018:3479290:3479367 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua018:3479290:3479367 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua018:3479290:3479367 [3] NCCL INFO comm 0xb9d17510 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua018:3479288:3479288 [1] NCCL INFO cudaDriverVersion 12010
+gpua018:3479288:3479288 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0>
+gpua018:3479288:3479288 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua018:3479288:3479369 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0>
+gpua018:3479288:3479369 [1] NCCL INFO Using network IB
+gpua018:3479288:3479369 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua018:3479288:3479369 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpua018:3479288:3479369 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua018:3479288:3479369 [1] NCCL INFO Connected all rings
+gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpua018:3479288:3479369 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua018:3479288:3479369 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua018:3479288:3479369 [1] NCCL INFO Connected all trees
+gpua018:3479288:3479369 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua018:3479288:3479369 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua018:3479288:3479369 [1] NCCL INFO comm 0x5176eca0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua022:3213423:3213423 [2] NCCL INFO cudaDriverVersion 12010
+gpua022:3213423:3213423 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0>
+gpua022:3213423:3213423 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua022:3213423:3213495 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0>
+gpua022:3213423:3213495 [2] NCCL INFO Using network IB
+gpua022:3213423:3213495 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua022:3213423:3213495 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpua022:3213423:3213495 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua022:3213423:3213495 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua022:3213423:3213495 [2] NCCL INFO Connected all rings
+gpua022:3213423:3213495 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua022:3213423:3213495 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua022:3213423:3213495 [2] NCCL INFO Connected all trees
+gpua022:3213423:3213495 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua022:3213423:3213495 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua022:3213423:3213495 [2] NCCL INFO comm 0xb5a97440 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua018:3479287:3479287 [0] NCCL INFO cudaDriverVersion 12010
+gpua018:3479287:3479287 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.18<0>
+gpua018:3479287:3479287 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua018:3479287:3479366 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.18<0>
+gpua018:3479287:3479366 [0] NCCL INFO Using network IB
+gpua018:3479287:3479366 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua018:3479287:3479366 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua018:3479287:3479366 [0] NCCL INFO Connected all rings
+gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpua018:3479287:3479366 [0] NCCL INFO Connected all trees
+gpua018:3479287:3479366 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua018:3479287:3479366 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua018:3479287:3479366 [0] NCCL INFO comm 0xa20beed0 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua022:3213424:3213424 [3] NCCL INFO cudaDriverVersion 12010
+gpua022:3213424:3213424 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0>
+gpua022:3213424:3213424 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua022:3213424:3213496 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0>
+gpua022:3213424:3213496 [3] NCCL INFO Using network IB
+gpua022:3213424:3213496 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua022:3213424:3213496 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpua022:3213424:3213496 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua022:3213424:3213496 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua022:3213424:3213496 [3] NCCL INFO Connected all rings
+gpua022:3213424:3213496 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua022:3213424:3213496 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua022:3213424:3213496 [3] NCCL INFO Connected all trees
+gpua022:3213424:3213496 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua022:3213424:3213496 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua022:3213424:3213496 [3] NCCL INFO comm 0x4eeb3510 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua041:2383593:2383593 [1] NCCL INFO cudaDriverVersion 12010
+gpua041:2383593:2383593 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0>
+gpua041:2383593:2383593 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua041:2383593:2383733 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0>
+gpua041:2383593:2383733 [1] NCCL INFO Using network IB
+gpua041:2383593:2383733 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua041:2383593:2383733 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpua041:2383593:2383733 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua041:2383593:2383733 [1] NCCL INFO Connected all rings
+gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpua041:2383593:2383733 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua041:2383593:2383733 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua041:2383593:2383733 [1] NCCL INFO Connected all trees
+gpua041:2383593:2383733 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua041:2383593:2383733 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua041:2383593:2383733 [1] NCCL INFO comm 0x4f62e590 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua022:3213422:3213422 [1] NCCL INFO cudaDriverVersion 12010
+gpua022:3213422:3213422 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0>
+gpua022:3213422:3213422 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua022:3213422:3213494 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0>
+gpua022:3213422:3213494 [1] NCCL INFO Using network IB
+gpua022:3213422:3213494 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua022:3213422:3213494 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua022:3213422:3213494 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua022:3213422:3213494 [1] NCCL INFO Connected all rings
+gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpua022:3213422:3213494 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua022:3213422:3213494 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua022:3213422:3213494 [1] NCCL INFO Connected all trees
+gpua022:3213422:3213494 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua022:3213422:3213494 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua022:3213422:3213494 [1] NCCL INFO comm 0xba8d1d30 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua063:1316628:1316628 [3] NCCL INFO cudaDriverVersion 12010
+gpua063:1316628:1316628 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0>
+gpua063:1316628:1316628 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua063:1316628:1316705 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0>
+gpua063:1316628:1316705 [3] NCCL INFO Using network IB
+gpua063:1316628:1316705 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua063:1316628:1316705 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpua063:1316628:1316705 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua063:1316628:1316705 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua063:1316628:1316705 [3] NCCL INFO Connected all rings
+gpua063:1316628:1316705 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua063:1316628:1316705 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua063:1316628:1316705 [3] NCCL INFO Connected all trees
+gpua063:1316628:1316705 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua063:1316628:1316705 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua063:1316628:1316705 [3] NCCL INFO comm 0x50f53420 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua091:1092313:1092313 [2] NCCL INFO cudaDriverVersion 12010
+gpua091:1092313:1092313 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0>
+gpua091:1092313:1092313 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua091:1092313:1092405 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0>
+gpua091:1092313:1092405 [2] NCCL INFO Using network IB
+gpua091:1092313:1092405 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua091:1092313:1092405 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpua091:1092313:1092405 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua091:1092313:1092405 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua091:1092313:1092405 [2] NCCL INFO Connected all rings
+gpua091:1092313:1092405 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua091:1092313:1092405 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua091:1092313:1092405 [2] NCCL INFO Connected all trees
+gpua091:1092313:1092405 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua091:1092313:1092405 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua091:1092313:1092405 [2] NCCL INFO comm 0xb9112ed0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua063:1316626:1316626 [1] NCCL INFO cudaDriverVersion 12010
+gpua063:1316626:1316626 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0>
+gpua063:1316626:1316626 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua063:1316626:1316707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0>
+gpua063:1316626:1316707 [1] NCCL INFO Using network IB
+gpua063:1316626:1316707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua063:1316626:1316707 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua063:1316626:1316707 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua063:1316626:1316707 [1] NCCL INFO Connected all rings
+gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpua063:1316626:1316707 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua063:1316626:1316707 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua063:1316626:1316707 [1] NCCL INFO Connected all trees
+gpua063:1316626:1316707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua063:1316626:1316707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua063:1316626:1316707 [1] NCCL INFO comm 0x50a76db0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua093:1851602:1851602 [2] NCCL INFO cudaDriverVersion 12010
+gpua093:1851602:1851602 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0>
+gpua093:1851602:1851602 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua093:1851602:1851687 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0>
+gpua093:1851602:1851687 [2] NCCL INFO Using network IB
+gpua093:1851602:1851687 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua093:1851602:1851687 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpua093:1851602:1851687 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua093:1851602:1851687 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua093:1851602:1851687 [2] NCCL INFO Connected all rings
+gpua093:1851602:1851687 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua093:1851602:1851687 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua093:1851602:1851687 [2] NCCL INFO Connected all trees
+gpua093:1851602:1851687 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua093:1851602:1851687 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua093:1851602:1851687 [2] NCCL INFO comm 0x8eeaee30 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua093:1851600:1851600 [0] NCCL INFO cudaDriverVersion 12010
+gpua093:1851600:1851600 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0>
+gpua093:1851600:1851600 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua093:1851600:1851684 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0>
+gpua093:1851600:1851684 [0] NCCL INFO Using network IB
+gpua093:1851600:1851684 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua093:1851600:1851684 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua093:1851600:1851684 [0] NCCL INFO Connected all rings
+gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpua093:1851600:1851684 [0] NCCL INFO Connected all trees
+gpua093:1851600:1851684 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua093:1851600:1851684 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua093:1851600:1851684 [0] NCCL INFO comm 0x9c356d50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua096:2182299:2182299 [1] NCCL INFO cudaDriverVersion 12010
+gpua096:2182299:2182299 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0>
+gpua096:2182299:2182299 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua096:2182299:2182385 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0>
+gpua096:2182299:2182385 [1] NCCL INFO Using network IB
+gpua096:2182299:2182385 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua096:2182299:2182385 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpua096:2182299:2182385 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua096:2182299:2182385 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua096:2182299:2182385 [1] NCCL INFO Connected all rings
+gpua096:2182299:2182385 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua096:2182299:2182385 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua096:2182299:2182385 [1] NCCL INFO Connected all trees
+gpua096:2182299:2182385 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua096:2182299:2182385 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua096:2182299:2182385 [1] NCCL INFO comm 0x50cf5510 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua063:1316627:1316627 [2] NCCL INFO cudaDriverVersion 12010
+gpua063:1316627:1316627 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0>
+gpua063:1316627:1316627 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua063:1316627:1316706 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0>
+gpua063:1316627:1316706 [2] NCCL INFO Using network IB
+gpua063:1316627:1316706 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua063:1316627:1316706 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpua063:1316627:1316706 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua063:1316627:1316706 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua063:1316627:1316706 [2] NCCL INFO Connected all rings
+gpua063:1316627:1316706 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua063:1316627:1316706 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua063:1316627:1316706 [2] NCCL INFO Connected all trees
+gpua063:1316627:1316706 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua063:1316627:1316706 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua063:1316627:1316706 [2] NCCL INFO comm 0x50e1a4d0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua014:1504764:1504764 [3] NCCL INFO cudaDriverVersion 12010
+gpua014:1504764:1504764 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:1504764:1504764 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:1504764:1504833 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:1504764:1504833 [3] NCCL INFO Using network IB
+gpua014:1504764:1504833 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua014:1504764:1504833 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpua014:1504764:1504833 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua014:1504764:1504833 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua014:1504764:1504833 [3] NCCL INFO Connected all rings
+gpua014:1504764:1504833 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua014:1504764:1504833 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua014:1504764:1504833 [3] NCCL INFO Connected all trees
+gpua014:1504764:1504833 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:1504764:1504833 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:1504764:1504833 [3] NCCL INFO comm 0x90a5180 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua091:1092311:1092311 [0] NCCL INFO cudaDriverVersion 12010
+gpua091:1092311:1092311 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0>
+gpua091:1092311:1092311 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua091:1092311:1092403 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0>
+gpua091:1092311:1092403 [0] NCCL INFO Using network IB
+gpua091:1092311:1092403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua091:1092311:1092403 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua091:1092311:1092403 [0] NCCL INFO Connected all rings
+gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpua091:1092311:1092403 [0] NCCL INFO Connected all trees
+gpua091:1092311:1092403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua091:1092311:1092403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua091:1092311:1092403 [0] NCCL INFO comm 0xb51c2df0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua021:3546921:3546921 [1] NCCL INFO cudaDriverVersion 12010
+gpua021:3546921:3546921 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0>
+gpua021:3546921:3546921 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua021:3546921:3547001 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0>
+gpua021:3546921:3547001 [1] NCCL INFO Using network IB
+gpua021:3546921:3547001 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua021:3546921:3547001 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpua021:3546921:3547001 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua021:3546921:3547001 [1] NCCL INFO Connected all rings
+gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpua021:3546921:3547001 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua021:3546921:3547001 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua021:3546921:3547001 [1] NCCL INFO Connected all trees
+gpua021:3546921:3547001 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua021:3546921:3547001 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua021:3546921:3547001 [1] NCCL INFO comm 0xb47c80d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua016:2146304:2146304 [0] NCCL INFO cudaDriverVersion 12010
+gpua016:2146304:2146304 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:2146304:2146304 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:2146304:2146395 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:2146304:2146395 [0] NCCL INFO Using network IB
+gpua016:2146304:2146395 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua016:2146304:2146395 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua016:2146304:2146395 [0] NCCL INFO Connected all rings
+gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpua016:2146304:2146395 [0] NCCL INFO Connected all trees
+gpua016:2146304:2146395 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:2146304:2146395 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:2146304:2146395 [0] NCCL INFO comm 0x94649e0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua016:2146305:2146305 [1] NCCL INFO cudaDriverVersion 12010
+gpua016:2146305:2146305 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:2146305:2146305 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:2146305:2146397 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:2146305:2146397 [1] NCCL INFO Using network IB
+gpua016:2146305:2146397 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua016:2146305:2146397 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua016:2146305:2146397 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua016:2146305:2146397 [1] NCCL INFO Connected all rings
+gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpua016:2146305:2146397 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua016:2146305:2146397 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua016:2146305:2146397 [1] NCCL INFO Connected all trees
+gpua016:2146305:2146397 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:2146305:2146397 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:2146305:2146397 [1] NCCL INFO comm 0x505b7da0 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua022:3213421:3213421 [0] NCCL INFO cudaDriverVersion 12010
+gpua022:3213421:3213421 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.22<0>
+gpua022:3213421:3213421 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua022:3213421:3213497 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.22<0>
+gpua022:3213421:3213497 [0] NCCL INFO Using network IB
+gpua022:3213421:3213497 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua022:3213421:3213497 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua022:3213421:3213497 [0] NCCL INFO Connected all rings
+gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpua022:3213421:3213497 [0] NCCL INFO Connected all trees
+gpua022:3213421:3213497 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua022:3213421:3213497 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua022:3213421:3213497 [0] NCCL INFO comm 0x5127a110 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua020:3382569:3382569 [3] NCCL INFO cudaDriverVersion 12010
+gpua020:3382569:3382569 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0>
+gpua020:3382569:3382569 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua020:3382569:3382644 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0>
+gpua020:3382569:3382644 [3] NCCL INFO Using network IB
+gpua020:3382569:3382644 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua020:3382569:3382644 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpua020:3382569:3382644 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua020:3382569:3382644 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua020:3382569:3382644 [3] NCCL INFO Connected all rings
+gpua020:3382569:3382644 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua020:3382569:3382644 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua020:3382569:3382644 [3] NCCL INFO Connected all trees
+gpua020:3382569:3382644 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua020:3382569:3382644 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua020:3382569:3382644 [3] NCCL INFO comm 0x50adbf90 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua021:3546922:3546922 [2] NCCL INFO cudaDriverVersion 12010
+gpua021:3546922:3546922 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0>
+gpua021:3546922:3546922 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua021:3546922:3547000 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0>
+gpua021:3546922:3547000 [2] NCCL INFO Using network IB
+gpua021:3546922:3547000 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua021:3546922:3547000 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpua021:3546922:3547000 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua021:3546922:3547000 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua021:3546922:3547000 [2] NCCL INFO Connected all rings
+gpua021:3546922:3547000 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua021:3546922:3547000 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua021:3546922:3547000 [2] NCCL INFO Connected all trees
+gpua021:3546922:3547000 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua021:3546922:3547000 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua021:3546922:3547000 [2] NCCL INFO comm 0xb64560d0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua041:2383592:2383592 [0] NCCL INFO cudaDriverVersion 12010
+gpua041:2383592:2383592 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0>
+gpua041:2383592:2383592 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua041:2383592:2383731 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0>
+gpua041:2383592:2383731 [0] NCCL INFO Using network IB
+gpua041:2383592:2383731 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua041:2383592:2383731 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua041:2383592:2383731 [0] NCCL INFO Connected all rings
+gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpua041:2383592:2383731 [0] NCCL INFO Connected all trees
+gpua041:2383592:2383731 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua041:2383592:2383731 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua041:2383592:2383731 [0] NCCL INFO comm 0x500f1b90 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua015:2678599:2678599 [1] NCCL INFO cudaDriverVersion 12010
+gpua015:2678599:2678599 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0>
+gpua015:2678599:2678599 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua015:2678599:2678672 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0>
+gpua015:2678599:2678672 [1] NCCL INFO Using network IB
+gpua015:2678599:2678672 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua015:2678599:2678672 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpua015:2678599:2678672 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua015:2678599:2678672 [1] NCCL INFO Connected all rings
+gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpua015:2678599:2678672 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua015:2678599:2678672 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua015:2678599:2678672 [1] NCCL INFO Connected all trees
+gpua015:2678599:2678672 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua015:2678599:2678672 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua015:2678599:2678672 [1] NCCL INFO comm 0xb6725330 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua088:4022850:4022850 [1] NCCL INFO cudaDriverVersion 12010
+gpua088:4022850:4022850 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0>
+gpua088:4022850:4022850 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua088:4022850:4022934 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0>
+gpua088:4022850:4022934 [1] NCCL INFO Using network IB
+gpua088:4022850:4022934 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua088:4022850:4022934 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua088:4022850:4022934 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua088:4022850:4022934 [1] NCCL INFO Connected all rings
+gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpua062:3999115:3999115 [0] NCCL INFO cudaDriverVersion 12010
+gpua062:3999115:3999115 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0>
+gpua062:3999115:3999115 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua062:3999115:3999187 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0>
+gpua062:3999115:3999187 [0] NCCL INFO Using network IB
+gpua062:3999115:3999187 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua062:3999115:3999187 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua062:3999115:3999187 [0] NCCL INFO Connected all rings
+gpua088:4022850:4022934 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua088:4022850:4022934 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua088:4022850:4022934 [1] NCCL INFO Connected all trees
+gpua088:4022850:4022934 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua088:4022850:4022934 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua088:4022850:4022934 [1] NCCL INFO comm 0xa543f510 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpua062:3999115:3999187 [0] NCCL INFO Connected all trees
+gpua062:3999115:3999187 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua062:3999115:3999187 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua062:3999115:3999187 [0] NCCL INFO comm 0x4f5279e0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua021:3546923:3546923 [3] NCCL INFO cudaDriverVersion 12010
+gpua021:3546923:3546923 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0>
+gpua021:3546923:3546923 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua021:3546923:3547002 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0>
+gpua021:3546923:3547002 [3] NCCL INFO Using network IB
+gpua021:3546923:3547002 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua021:3546923:3547002 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpua021:3546923:3547002 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua021:3546923:3547002 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua021:3546923:3547002 [3] NCCL INFO Connected all rings
+gpua021:3546923:3547002 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua021:3546923:3547002 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua021:3546923:3547002 [3] NCCL INFO Connected all trees
+gpua021:3546923:3547002 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua021:3546923:3547002 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua021:3546923:3547002 [3] NCCL INFO comm 0x51142940 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua091:1092312:1092312 [1] NCCL INFO cudaDriverVersion 12010
+gpua091:1092312:1092312 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0>
+gpua091:1092312:1092312 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua091:1092312:1092406 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0>
+gpua091:1092312:1092406 [1] NCCL INFO Using network IB
+gpua091:1092312:1092406 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua091:1092312:1092406 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpua091:1092312:1092406 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua091:1092312:1092406 [1] NCCL INFO Connected all rings
+gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpua091:1092312:1092406 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua091:1092312:1092406 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua091:1092312:1092406 [1] NCCL INFO Connected all trees
+gpua091:1092312:1092406 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua091:1092312:1092406 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua091:1092312:1092406 [1] NCCL INFO comm 0xb15c78d0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua041:2383595:2383595 [3] NCCL INFO cudaDriverVersion 12010
+gpua041:2383595:2383595 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0>
+gpua041:2383595:2383595 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua041:2383595:2383732 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0>
+gpua041:2383595:2383732 [3] NCCL INFO Using network IB
+gpua041:2383595:2383732 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua041:2383595:2383732 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpua041:2383595:2383732 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua041:2383595:2383732 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua041:2383595:2383732 [3] NCCL INFO Connected all rings
+gpua041:2383595:2383732 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua041:2383595:2383732 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua041:2383595:2383732 [3] NCCL INFO Connected all trees
+gpua041:2383595:2383732 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua041:2383595:2383732 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua041:2383595:2383732 [3] NCCL INFO comm 0x50d6c2c0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua014:1504763:1504763 [2] NCCL INFO cudaDriverVersion 12010
+gpua014:1504763:1504763 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:1504763:1504763 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:1504763:1504834 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:1504763:1504834 [2] NCCL INFO Using network IB
+gpua014:1504763:1504834 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua014:1504763:1504834 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpua014:1504763:1504834 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua014:1504763:1504834 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua014:1504763:1504834 [2] NCCL INFO Connected all rings
+gpua014:1504763:1504834 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua014:1504763:1504834 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua014:1504763:1504834 [2] NCCL INFO Connected all trees
+gpua014:1504763:1504834 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:1504763:1504834 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:1504763:1504834 [2] NCCL INFO comm 0x8d97bae0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua020:3382567:3382567 [1] NCCL INFO cudaDriverVersion 12010
+gpua020:3382567:3382567 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0>
+gpua020:3382567:3382567 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua020:3382567:3382643 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0>
+gpua020:3382567:3382643 [1] NCCL INFO Using network IB
+gpua020:3382567:3382643 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua020:3382567:3382643 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua020:3382567:3382643 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua020:3382567:3382643 [1] NCCL INFO Connected all rings
+gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpua020:3382567:3382643 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua020:3382567:3382643 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua020:3382567:3382643 [1] NCCL INFO Connected all trees
+gpua020:3382567:3382643 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua020:3382567:3382643 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua020:3382567:3382643 [1] NCCL INFO comm 0x50206940 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua062:3999116:3999116 [1] NCCL INFO cudaDriverVersion 12010
+gpua062:3999116:3999116 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0>
+gpua062:3999116:3999116 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua062:3999116:3999189 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0>
+gpua062:3999116:3999189 [1] NCCL INFO Using network IB
+gpua062:3999116:3999189 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua062:3999116:3999189 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpua062:3999116:3999189 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua062:3999116:3999189 [1] NCCL INFO Connected all rings
+gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpua062:3999116:3999189 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua062:3999116:3999189 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua062:3999116:3999189 [1] NCCL INFO Connected all trees
+gpua062:3999116:3999189 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua062:3999116:3999189 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua062:3999116:3999189 [1] NCCL INFO comm 0x8302c90 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua015:2678598:2678598 [0] NCCL INFO cudaDriverVersion 12010
+gpua015:2678598:2678598 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0>
+gpua015:2678598:2678598 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua015:2678598:2678673 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0>
+gpua015:2678598:2678673 [0] NCCL INFO Using network IB
+gpua015:2678598:2678673 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua015:2678598:2678673 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua015:2678598:2678673 [0] NCCL INFO Connected all rings
+gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpua015:2678598:2678673 [0] NCCL INFO Connected all trees
+gpua015:2678598:2678673 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua015:2678598:2678673 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua015:2678598:2678673 [0] NCCL INFO comm 0x5031b3d0 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua016:2146307:2146307 [3] NCCL INFO cudaDriverVersion 12010
+gpua016:2146307:2146307 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:2146307:2146307 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:2146307:2146396 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:2146307:2146396 [3] NCCL INFO Using network IB
+gpua016:2146307:2146396 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua016:2146307:2146396 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpua016:2146307:2146396 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua016:2146307:2146396 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua016:2146307:2146396 [3] NCCL INFO Connected all rings
+gpua016:2146307:2146396 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua016:2146307:2146396 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua016:2146307:2146396 [3] NCCL INFO Connected all trees
+gpua016:2146307:2146396 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:2146307:2146396 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:2146307:2146396 [3] NCCL INFO comm 0xb69ce0b0 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua068:4120162:4120162 [0] NCCL INFO cudaDriverVersion 12010
+gpua068:4120162:4120162 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0>
+gpua068:4120162:4120162 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua068:4120162:4120251 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0>
+gpua068:4120162:4120251 [0] NCCL INFO Using network IB
+gpua068:4120162:4120251 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua068:4120162:4120251 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua068:4120162:4120251 [0] NCCL INFO Connected all rings
+gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpua068:4120162:4120251 [0] NCCL INFO Connected all trees
+gpua068:4120162:4120251 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua068:4120162:4120251 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua068:4120162:4120251 [0] NCCL INFO comm 0x955bda0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua068:4120163:4120163 [1] NCCL INFO cudaDriverVersion 12010
+gpua068:4120163:4120163 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0>
+gpua068:4120163:4120163 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua068:4120163:4120252 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0>
+gpua068:4120163:4120252 [1] NCCL INFO Using network IB
+gpua068:4120163:4120252 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua068:4120163:4120252 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpua068:4120163:4120252 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua068:4120163:4120252 [1] NCCL INFO Connected all rings
+gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpua068:4120163:4120252 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua068:4120163:4120252 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua068:4120163:4120252 [1] NCCL INFO Connected all trees
+gpua068:4120163:4120252 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua068:4120163:4120252 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua068:4120163:4120252 [1] NCCL INFO comm 0x4ffe8620 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua068:4120165:4120165 [3] NCCL INFO cudaDriverVersion 12010
+gpua068:4120165:4120165 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0>
+gpua068:4120165:4120165 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua068:4120165:4120250 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0>
+gpua068:4120165:4120250 [3] NCCL INFO Using network IB
+gpua068:4120165:4120250 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua068:4120165:4120250 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpua068:4120165:4120250 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua068:4120165:4120250 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua068:4120165:4120250 [3] NCCL INFO Connected all rings
+gpua068:4120165:4120250 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua068:4120165:4120250 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua068:4120165:4120250 [3] NCCL INFO Connected all trees
+gpua068:4120165:4120250 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua068:4120165:4120250 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua068:4120165:4120250 [3] NCCL INFO comm 0x8d5e2480 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua068:4120164:4120164 [2] NCCL INFO cudaDriverVersion 12010
+gpua068:4120164:4120164 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.68<0>
+gpua068:4120164:4120164 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua068:4120164:4120253 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.68<0>
+gpua068:4120164:4120253 [2] NCCL INFO Using network IB
+gpua068:4120164:4120253 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua068:4120164:4120253 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpua068:4120164:4120253 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua068:4120164:4120253 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua068:4120164:4120253 [2] NCCL INFO Connected all rings
+gpua068:4120164:4120253 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua068:4120164:4120253 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua068:4120164:4120253 [2] NCCL INFO Connected all trees
+gpua068:4120164:4120253 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua068:4120164:4120253 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua068:4120164:4120253 [2] NCCL INFO comm 0xb899ffd0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua060:2765420:2765420 [0] NCCL INFO cudaDriverVersion 12010
+gpua060:2765420:2765420 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2765420:2765420 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2765420:2765495 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2765420:2765495 [0] NCCL INFO Using network IB
+gpua060:2765420:2765495 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua060:2765420:2765495 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua060:2765420:2765495 [0] NCCL INFO Connected all rings
+gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpua060:2765420:2765495 [0] NCCL INFO Connected all trees
+gpua060:2765420:2765495 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2765420:2765495 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2765420:2765495 [0] NCCL INFO comm 0x8ee9a7d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua060:2765422:2765422 [2] NCCL INFO cudaDriverVersion 12010
+gpua060:2765422:2765422 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2765422:2765422 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2765422:2765496 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2765422:2765496 [2] NCCL INFO Using network IB
+gpua060:2765422:2765496 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua060:2765422:2765496 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpua060:2765422:2765496 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua060:2765422:2765496 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua060:2765422:2765496 [2] NCCL INFO Connected all rings
+gpua060:2765422:2765496 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua060:2765422:2765496 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua060:2765422:2765496 [2] NCCL INFO Connected all trees
+gpua060:2765422:2765496 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2765422:2765496 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2765422:2765496 [2] NCCL INFO comm 0x50dc0f50 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua041:2383594:2383594 [2] NCCL INFO cudaDriverVersion 12010
+gpua041:2383594:2383594 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.41<0>
+gpua041:2383594:2383594 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua041:2383594:2383734 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.41<0>
+gpua041:2383594:2383734 [2] NCCL INFO Using network IB
+gpua041:2383594:2383734 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua041:2383594:2383734 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpua041:2383594:2383734 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua041:2383594:2383734 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua041:2383594:2383734 [2] NCCL INFO Connected all rings
+gpua041:2383594:2383734 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua041:2383594:2383734 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua041:2383594:2383734 [2] NCCL INFO Connected all trees
+gpua041:2383594:2383734 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua041:2383594:2383734 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua041:2383594:2383734 [2] NCCL INFO comm 0xb8aa2570 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua062:3999117:3999117 [2] NCCL INFO cudaDriverVersion 12010
+gpua062:3999117:3999117 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0>
+gpua062:3999117:3999117 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua062:3999117:3999188 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0>
+gpua062:3999117:3999188 [2] NCCL INFO Using network IB
+gpua062:3999117:3999188 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua062:3999117:3999188 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpua062:3999117:3999188 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua062:3999117:3999188 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua062:3999117:3999188 [2] NCCL INFO Connected all rings
+gpua062:3999117:3999188 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua062:3999117:3999188 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua062:3999117:3999188 [2] NCCL INFO Connected all trees
+gpua062:3999117:3999188 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua062:3999117:3999188 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua062:3999117:3999188 [2] NCCL INFO comm 0x5126ca20 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua015:2678600:2678600 [2] NCCL INFO cudaDriverVersion 12010
+gpua015:2678600:2678600 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0>
+gpua015:2678600:2678600 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua015:2678600:2678674 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0>
+gpua015:2678600:2678674 [2] NCCL INFO Using network IB
+gpua015:2678600:2678674 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua015:2678600:2678674 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpua015:2678600:2678674 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua015:2678600:2678674 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua015:2678600:2678674 [2] NCCL INFO Connected all rings
+gpua015:2678600:2678674 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua015:2678600:2678674 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua015:2678600:2678674 [2] NCCL INFO Connected all trees
+gpua015:2678600:2678674 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua015:2678600:2678674 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua015:2678600:2678674 [2] NCCL INFO comm 0x4fb6eec0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua088:4022852:4022852 [3] NCCL INFO cudaDriverVersion 12010
+gpua088:4022852:4022852 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0>
+gpua088:4022852:4022852 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua088:4022852:4022933 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0>
+gpua088:4022852:4022933 [3] NCCL INFO Using network IB
+gpua088:4022852:4022933 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua088:4022852:4022933 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpua088:4022852:4022933 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua088:4022852:4022933 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua088:4022852:4022933 [3] NCCL INFO Connected all rings
+gpua088:4022852:4022933 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua088:4022852:4022933 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua088:4022852:4022933 [3] NCCL INFO Connected all trees
+gpua088:4022852:4022933 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua088:4022852:4022933 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua088:4022852:4022933 [3] NCCL INFO comm 0xb0a25610 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua096:2182301:2182301 [3] NCCL INFO cudaDriverVersion 12010
+gpua096:2182301:2182301 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0>
+gpua096:2182301:2182301 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua096:2182301:2182382 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0>
+gpua096:2182301:2182382 [3] NCCL INFO Using network IB
+gpua096:2182301:2182382 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua096:2182301:2182382 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpua096:2182301:2182382 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua096:2182301:2182382 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua096:2182301:2182382 [3] NCCL INFO Connected all rings
+gpua096:2182301:2182382 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua096:2182301:2182382 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua096:2182301:2182382 [3] NCCL INFO Connected all trees
+gpua096:2182301:2182382 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua096:2182301:2182382 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua096:2182301:2182382 [3] NCCL INFO comm 0xb7ff8a70 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua014:1504761:1504831 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:1504761:1504831 [0] NCCL INFO Using network IB
+gpua014:1504761:1504831 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua014:1504761:1504831 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua014:1504761:1504831 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua014:1504761:1504831 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua014:1504761:1504831 [0] NCCL INFO Connected all rings
+gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:1504761:1504831 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpua014:1504761:1504831 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpua014:1504761:1504831 [0] NCCL INFO Connected all trees
+gpua014:1504761:1504831 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:1504761:1504831 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:1504761:1504831 [0] NCCL INFO comm 0x4fbe12c0 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua093:1851603:1851603 [3] NCCL INFO cudaDriverVersion 12010
+gpua093:1851603:1851603 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0>
+gpua093:1851603:1851603 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua093:1851603:1851686 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0>
+gpua093:1851603:1851686 [3] NCCL INFO Using network IB
+gpua093:1851603:1851686 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua093:1851603:1851686 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpua093:1851603:1851686 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua093:1851603:1851686 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua093:1851603:1851686 [3] NCCL INFO Connected all rings
+gpua093:1851603:1851686 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua093:1851603:1851686 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua093:1851603:1851686 [3] NCCL INFO Connected all trees
+gpua093:1851603:1851686 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua093:1851603:1851686 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua093:1851603:1851686 [3] NCCL INFO comm 0x51d23280 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua014:1504762:1504762 [1] NCCL INFO cudaDriverVersion 12010
+gpua014:1504762:1504762 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.14<0>
+gpua014:1504762:1504762 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua014:1504762:1504832 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.14<0>
+gpua014:1504762:1504832 [1] NCCL INFO Using network IB
+gpua014:1504762:1504832 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua014:1504762:1504832 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpua014:1504762:1504832 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua014:1504762:1504832 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua014:1504762:1504832 [1] NCCL INFO Connected all rings
+gpua014:1504762:1504832 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua014:1504762:1504832 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua014:1504762:1504832 [1] NCCL INFO Connected all trees
+gpua014:1504762:1504832 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua014:1504762:1504832 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua014:1504762:1504832 [1] NCCL INFO comm 0xa6220c0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua062:3999118:3999118 [3] NCCL INFO cudaDriverVersion 12010
+gpua062:3999118:3999118 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.62<0>
+gpua062:3999118:3999118 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua062:3999118:3999190 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.62<0>
+gpua062:3999118:3999190 [3] NCCL INFO Using network IB
+gpua062:3999118:3999190 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua062:3999118:3999190 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpua062:3999118:3999190 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua062:3999118:3999190 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua062:3999118:3999190 [3] NCCL INFO Connected all rings
+gpua062:3999118:3999190 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua062:3999118:3999190 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua062:3999118:3999190 [3] NCCL INFO Connected all trees
+gpua062:3999118:3999190 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua062:3999118:3999190 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua062:3999118:3999190 [3] NCCL INFO comm 0x4f6c8ad0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua096:2182300:2182300 [2] NCCL INFO cudaDriverVersion 12010
+gpua096:2182300:2182300 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0>
+gpua096:2182300:2182300 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua096:2182300:2182383 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0>
+gpua096:2182300:2182383 [2] NCCL INFO Using network IB
+gpua096:2182300:2182383 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua096:2182300:2182383 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpua096:2182300:2182383 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua096:2182300:2182383 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua096:2182300:2182383 [2] NCCL INFO Connected all rings
+gpua096:2182300:2182383 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua096:2182300:2182383 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua096:2182300:2182383 [2] NCCL INFO Connected all trees
+gpua096:2182300:2182383 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua096:2182300:2182383 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua096:2182300:2182383 [2] NCCL INFO comm 0x50397010 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua020:3382568:3382568 [2] NCCL INFO cudaDriverVersion 12010
+gpua020:3382568:3382568 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0>
+gpua020:3382568:3382568 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua020:3382568:3382642 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0>
+gpua020:3382568:3382642 [2] NCCL INFO Using network IB
+gpua020:3382568:3382642 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua020:3382568:3382642 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpua020:3382568:3382642 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua020:3382568:3382642 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua020:3382568:3382642 [2] NCCL INFO Connected all rings
+gpua020:3382568:3382642 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua020:3382568:3382642 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua020:3382568:3382642 [2] NCCL INFO Connected all trees
+gpua020:3382568:3382642 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua020:3382568:3382642 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua020:3382568:3382642 [2] NCCL INFO comm 0x4f345750 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua093:1851601:1851601 [1] NCCL INFO cudaDriverVersion 12010
+gpua093:1851601:1851601 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.93<0>
+gpua093:1851601:1851601 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua093:1851601:1851685 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.93<0>
+gpua093:1851601:1851685 [1] NCCL INFO Using network IB
+gpua093:1851601:1851685 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua093:1851601:1851685 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua093:1851601:1851685 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua093:1851601:1851685 [1] NCCL INFO Connected all rings
+gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpua093:1851601:1851685 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua093:1851601:1851685 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua093:1851601:1851685 [1] NCCL INFO Connected all trees
+gpua093:1851601:1851685 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua093:1851601:1851685 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua093:1851601:1851685 [1] NCCL INFO comm 0x8d2d4770 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua016:2146306:2146306 [2] NCCL INFO cudaDriverVersion 12010
+gpua016:2146306:2146306 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.16<0>
+gpua016:2146306:2146306 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua016:2146306:2146394 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.16<0>
+gpua016:2146306:2146394 [2] NCCL INFO Using network IB
+gpua016:2146306:2146394 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua016:2146306:2146394 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpua016:2146306:2146394 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua016:2146306:2146394 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua016:2146306:2146394 [2] NCCL INFO Connected all rings
+gpua016:2146306:2146394 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua016:2146306:2146394 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua016:2146306:2146394 [2] NCCL INFO Connected all trees
+gpua016:2146306:2146394 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua016:2146306:2146394 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua016:2146306:2146394 [2] NCCL INFO comm 0xb80c42d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua063:1316625:1316625 [0] NCCL INFO cudaDriverVersion 12010
+gpua063:1316625:1316625 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.63<0>
+gpua063:1316625:1316625 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua063:1316625:1316708 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.63<0>
+gpua063:1316625:1316708 [0] NCCL INFO Using network IB
+gpua063:1316625:1316708 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua063:1316625:1316708 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua063:1316625:1316708 [0] NCCL INFO Connected all rings
+gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpua063:1316625:1316708 [0] NCCL INFO Connected all trees
+gpua063:1316625:1316708 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua063:1316625:1316708 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua063:1316625:1316708 [0] NCCL INFO comm 0x50020800 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua088:4022851:4022851 [2] NCCL INFO cudaDriverVersion 12010
+gpua088:4022851:4022851 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0>
+gpua088:4022851:4022851 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua088:4022851:4022931 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0>
+gpua088:4022851:4022931 [2] NCCL INFO Using network IB
+gpua088:4022851:4022931 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua088:4022851:4022931 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpua088:4022851:4022931 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua088:4022851:4022931 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua088:4022851:4022931 [2] NCCL INFO Connected all rings
+gpua088:4022851:4022931 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua088:4022851:4022931 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua088:4022851:4022931 [2] NCCL INFO Connected all trees
+gpua088:4022851:4022931 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua088:4022851:4022931 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua088:4022851:4022931 [2] NCCL INFO comm 0x8b447690 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua021:3546920:3546920 [0] NCCL INFO cudaDriverVersion 12010
+gpua021:3546920:3546920 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.21<0>
+gpua021:3546920:3546920 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua021:3546920:3547003 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.21<0>
+gpua021:3546920:3547003 [0] NCCL INFO Using network IB
+gpua021:3546920:3547003 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua021:3546920:3547003 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua021:3546920:3547003 [0] NCCL INFO Connected all rings
+gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpua021:3546920:3547003 [0] NCCL INFO Connected all trees
+gpua021:3546920:3547003 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua021:3546920:3547003 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua021:3546920:3547003 [0] NCCL INFO comm 0x91f0590 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua020:3382566:3382566 [0] NCCL INFO cudaDriverVersion 12010
+gpua020:3382566:3382566 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.20<0>
+gpua020:3382566:3382566 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua020:3382566:3382645 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.20<0>
+gpua020:3382566:3382645 [0] NCCL INFO Using network IB
+gpua020:3382566:3382645 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua020:3382566:3382645 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua020:3382566:3382645 [0] NCCL INFO Connected all rings
+gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpua020:3382566:3382645 [0] NCCL INFO Connected all trees
+gpua020:3382566:3382645 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua020:3382566:3382645 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua020:3382566:3382645 [0] NCCL INFO comm 0x4ff54b70 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua096:2182298:2182298 [0] NCCL INFO cudaDriverVersion 12010
+gpua096:2182298:2182298 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.96<0>
+gpua096:2182298:2182298 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua096:2182298:2182384 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.96<0>
+gpua096:2182298:2182384 [0] NCCL INFO Using network IB
+gpua096:2182298:2182384 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua096:2182298:2182384 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua096:2182298:2182384 [0] NCCL INFO Connected all rings
+gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpua096:2182298:2182384 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpua096:2182298:2182384 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpua096:2182298:2182384 [0] NCCL INFO Connected all trees
+gpua096:2182298:2182384 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua096:2182298:2182384 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua096:2182298:2182384 [0] NCCL INFO comm 0x504ff500 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua091:1092314:1092314 [3] NCCL INFO cudaDriverVersion 12010
+gpua091:1092314:1092314 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.91<0>
+gpua091:1092314:1092314 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua091:1092314:1092404 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.91<0>
+gpua091:1092314:1092404 [3] NCCL INFO Using network IB
+gpua091:1092314:1092404 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua091:1092314:1092404 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpua091:1092314:1092404 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua091:1092314:1092404 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua091:1092314:1092404 [3] NCCL INFO Connected all rings
+gpua091:1092314:1092404 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua091:1092314:1092404 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua091:1092314:1092404 [3] NCCL INFO Connected all trees
+gpua091:1092314:1092404 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua091:1092314:1092404 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua091:1092314:1092404 [3] NCCL INFO comm 0x508531a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua088:4022849:4022849 [0] NCCL INFO cudaDriverVersion 12010
+gpua088:4022849:4022849 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.88<0>
+gpua088:4022849:4022849 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua088:4022849:4022932 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.88<0>
+gpua088:4022849:4022932 [0] NCCL INFO Using network IB
+gpua088:4022849:4022932 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua088:4022849:4022932 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua088:4022849:4022932 [0] NCCL INFO Connected all rings
+gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpua088:4022849:4022932 [0] NCCL INFO Connected all trees
+gpua088:4022849:4022932 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua088:4022849:4022932 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua088:4022849:4022932 [0] NCCL INFO comm 0x8e555600 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua060:2765423:2765423 [3] NCCL INFO cudaDriverVersion 12010
+gpua060:2765423:2765423 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2765423:2765423 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2765423:2765497 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2765423:2765497 [3] NCCL INFO Using network IB
+gpua060:2765423:2765497 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua060:2765423:2765497 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpua060:2765423:2765497 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua060:2765423:2765497 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua060:2765423:2765497 [3] NCCL INFO Connected all rings
+gpua060:2765423:2765497 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua060:2765423:2765497 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua060:2765423:2765497 [3] NCCL INFO Connected all trees
+gpua060:2765423:2765497 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2765423:2765497 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2765423:2765497 [3] NCCL INFO comm 0x8bb0f9c0 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua015:2678601:2678601 [3] NCCL INFO cudaDriverVersion 12010
+gpua015:2678601:2678601 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.15<0>
+gpua015:2678601:2678601 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua015:2678601:2678671 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.15<0>
+gpua015:2678601:2678671 [3] NCCL INFO Using network IB
+gpua015:2678601:2678671 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua015:2678601:2678671 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpua015:2678601:2678671 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua015:2678601:2678671 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua015:2678601:2678671 [3] NCCL INFO Connected all rings
+gpua015:2678601:2678671 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua015:2678601:2678671 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua015:2678601:2678671 [3] NCCL INFO Connected all trees
+gpua015:2678601:2678671 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua015:2678601:2678671 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua015:2678601:2678671 [3] NCCL INFO comm 0x510fc310 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua060:2765421:2765421 [1] NCCL INFO cudaDriverVersion 12010
+gpua060:2765421:2765421 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2765421:2765421 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2765421:2765498 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2765421:2765498 [1] NCCL INFO Using network IB
+gpua060:2765421:2765498 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua060:2765421:2765498 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua060:2765421:2765498 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua060:2765421:2765498 [1] NCCL INFO Connected all rings
+gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpua060:2765421:2765498 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua060:2765421:2765498 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua060:2765421:2765498 [1] NCCL INFO Connected all trees
+gpua060:2765421:2765498 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2765421:2765498 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2765421:2765498 [1] NCCL INFO comm 0xf88a8d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpua014:0/64] 2023-07-03 02:32:39,445 (trainer:732) INFO: 6epoch:train:1-100batch: iter_time=1.461, forward_time=0.221, loss_ctc=100.864, loss_att=81.642, acc=0.628, loss=87.408, backward_time=0.761, grad_norm=121.325, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.116, optim0_lr0=1.767e-04, train_time=8.651
+[gpua014:0/64] 2023-07-03 02:34:18,760 (trainer:732) INFO: 6epoch:train:101-200batch: iter_time=1.090e-04, forward_time=0.105, loss_ctc=78.378, loss_att=64.090, acc=0.627, loss=68.377, backward_time=0.745, grad_norm=91.037, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.764e-04, train_time=1.987
+[gpua014:0/64] 2023-07-03 02:36:10,033 (trainer:732) INFO: 6epoch:train:201-300batch: iter_time=1.067e-04, forward_time=0.105, loss_ctc=94.972, loss_att=74.236, acc=0.638, loss=80.457, backward_time=0.765, grad_norm=103.237, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.762e-04, train_time=2.225
+[gpua014:0/64] 2023-07-03 02:37:48,817 (trainer:732) INFO: 6epoch:train:301-400batch: iter_time=1.078e-04, forward_time=0.104, loss_ctc=86.070, loss_att=66.443, acc=0.629, loss=72.331, backward_time=0.744, grad_norm=84.955, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.760e-04, train_time=1.975
+[gpua014:0/64] 2023-07-03 02:39:49,150 (trainer:732) INFO: 6epoch:train:401-500batch: iter_time=9.898e-05, forward_time=0.103, loss_ctc=93.921, loss_att=74.105, acc=0.650, loss=80.050, backward_time=0.782, grad_norm=98.045, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.110, optim0_lr0=1.758e-04, train_time=2.406
+[gpua014:0/64] 2023-07-03 02:41:38,188 (trainer:732) INFO: 6epoch:train:501-600batch: iter_time=9.613e-05, forward_time=0.110, loss_ctc=92.212, loss_att=75.110, acc=0.644, loss=80.241, backward_time=0.775, grad_norm=93.984, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.110, optim0_lr0=1.756e-04, train_time=2.181
+[gpua014:0/64] 2023-07-03 02:43:44,084 (trainer:732) INFO: 6epoch:train:601-700batch: iter_time=1.041e-04, forward_time=0.104, loss_ctc=79.713, loss_att=58.628, acc=0.633, loss=64.953, backward_time=0.786, grad_norm=86.077, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.754e-04, train_time=2.518
+[gpua014:0/64] 2023-07-03 02:46:01,463 (trainer:732) INFO: 6epoch:train:701-800batch: iter_time=3.962e-04, forward_time=0.205, loss_ctc=88.209, loss_att=69.865, acc=0.628, loss=75.368, backward_time=0.824, grad_norm=95.380, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.115, optim0_lr0=1.751e-04, train_time=2.747
+[gpua014:0/64] 2023-07-03 02:48:25,715 (trainer:732) INFO: 6epoch:train:801-900batch: iter_time=1.126e-04, forward_time=0.105, loss_ctc=82.181, loss_att=60.909, acc=0.621, loss=67.291, backward_time=0.809, grad_norm=117.747, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.749e-04, train_time=2.885
+[gpua014:0/64] 2023-07-03 02:50:13,828 (trainer:732) INFO: 6epoch:train:901-1000batch: iter_time=1.053e-04, forward_time=0.105, loss_ctc=82.054, loss_att=69.815, acc=0.652, loss=73.486, backward_time=0.763, grad_norm=82.166, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.747e-04, train_time=2.162
+[gpua014:0/64] 2023-07-03 02:50:33,788 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-07-03 02:50:55,708 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 02:50:59,900 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4efa31d7b0>)
+[gpua014:0/64] 2023-07-03 02:50:59,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpua014:0/64] 2023-07-03 02:50:59,923 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 02:57:26,461 (trainer:732) INFO: 6epoch:train:1001-1100batch: iter_time=2.656, forward_time=0.185, loss_ctc=94.854, loss_att=79.671, acc=0.629, loss=84.226, backward_time=0.765, grad_norm=116.167, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.115, optim0_lr0=1.745e-04, train_time=8.651
+[gpua014:0/64] 2023-07-03 02:59:06,908 (trainer:732) INFO: 6epoch:train:1101-1200batch: iter_time=1.350e-04, forward_time=0.108, loss_ctc=76.456, loss_att=63.432, acc=0.623, loss=67.339, backward_time=0.749, grad_norm=107.085, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.743e-04, train_time=2.010
+[gpua014:0/64] 2023-07-03 03:00:46,333 (trainer:732) INFO: 6epoch:train:1201-1300batch: iter_time=1.352e-04, forward_time=0.108, loss_ctc=93.595, loss_att=74.995, acc=0.638, loss=80.575, backward_time=0.746, grad_norm=87.098, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.741e-04, train_time=1.988
+[gpua014:0/64] 2023-07-03 03:02:31,926 (trainer:732) INFO: 6epoch:train:1301-1400batch: iter_time=1.261e-04, forward_time=0.107, loss_ctc=84.240, loss_att=65.626, acc=0.628, loss=71.210, backward_time=0.757, grad_norm=81.262, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.739e-04, train_time=2.112
+[gpua014:0/64] 2023-07-03 03:04:16,758 (trainer:732) INFO: 6epoch:train:1401-1500batch: iter_time=1.076e-04, forward_time=0.105, loss_ctc=91.705, loss_att=74.473, acc=0.644, loss=79.643, backward_time=0.764, grad_norm=81.849, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.737e-04, train_time=2.096
+[gpua014:0/64] 2023-07-03 03:05:56,207 (trainer:732) INFO: 6epoch:train:1501-1600batch: iter_time=1.166e-04, forward_time=0.105, loss_ctc=88.849, loss_att=73.349, acc=0.644, loss=77.999, backward_time=0.746, grad_norm=82.749, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.734e-04, train_time=1.989
+[gpua014:0/64] 2023-07-03 03:07:46,119 (trainer:732) INFO: 6epoch:train:1601-1700batch: iter_time=1.090e-04, forward_time=0.105, loss_ctc=81.011, loss_att=59.311, acc=0.626, loss=65.821, backward_time=0.765, grad_norm=87.128, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.732e-04, train_time=2.198
+[gpua014:0/64] 2023-07-03 03:09:56,784 (trainer:732) INFO: 6epoch:train:1701-1800batch: iter_time=1.036e-04, forward_time=0.113, loss_ctc=86.103, loss_att=69.824, acc=0.617, loss=74.708, backward_time=0.828, grad_norm=86.823, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.730e-04, train_time=2.613
+[gpua014:0/64] 2023-07-03 03:12:03,429 (trainer:732) INFO: 6epoch:train:1801-1900batch: iter_time=9.738e-05, forward_time=0.139, loss_ctc=79.047, loss_att=59.219, acc=0.618, loss=65.167, backward_time=0.783, grad_norm=83.558, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.113, optim0_lr0=1.728e-04, train_time=2.533
+[gpua014:0/64] 2023-07-03 03:14:11,604 (trainer:732) INFO: 6epoch:train:1901-2000batch: iter_time=9.493e-05, forward_time=0.130, loss_ctc=80.506, loss_att=69.250, acc=0.651, loss=72.627, backward_time=0.780, grad_norm=81.945, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.726e-04, train_time=2.563
+[gpua014:0/64] 2023-07-03 03:14:31,249 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-07-03 03:14:53,126 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 03:14:57,706 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4ef8654a90>)
+[gpua014:0/64] 2023-07-03 03:14:57,706 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpua014:0/64] 2023-07-03 03:14:57,713 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 03:20:32,697 (trainer:732) INFO: 6epoch:train:2001-2100batch: iter_time=2.337, forward_time=0.203, loss_ctc=92.289, loss_att=78.161, acc=0.643, loss=82.399, backward_time=0.769, grad_norm=99.875, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.115, optim0_lr0=1.724e-04, train_time=7.620
+[gpua014:0/64] 2023-07-03 03:22:30,232 (trainer:732) INFO: 6epoch:train:2101-2200batch: iter_time=1.127e-04, forward_time=0.105, loss_ctc=74.755, loss_att=62.473, acc=0.636, loss=66.158, backward_time=0.770, grad_norm=77.549, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.722e-04, train_time=2.352
+[gpua014:0/64] 2023-07-03 03:24:27,935 (trainer:732) INFO: 6epoch:train:2201-2300batch: iter_time=1.054e-04, forward_time=0.107, loss_ctc=93.934, loss_att=72.433, acc=0.645, loss=78.883, backward_time=0.767, grad_norm=109.491, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.720e-04, train_time=2.354
+[gpua014:0/64] 2023-07-03 03:26:25,344 (trainer:732) INFO: 6epoch:train:2301-2400batch: iter_time=1.301e-04, forward_time=0.106, loss_ctc=83.956, loss_att=64.235, acc=0.642, loss=70.151, backward_time=0.796, grad_norm=108.855, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.110, optim0_lr0=1.718e-04, train_time=2.348
+[gpua014:0/64] 2023-07-03 03:28:33,609 (trainer:732) INFO: 6epoch:train:2401-2500batch: iter_time=1.435e-04, forward_time=0.106, loss_ctc=88.720, loss_att=69.805, acc=0.662, loss=75.480, backward_time=0.792, grad_norm=103.908, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.716e-04, train_time=2.565
+[gpua014:0/64] 2023-07-03 03:30:27,664 (trainer:732) INFO: 6epoch:train:2501-2600batch: iter_time=1.432e-04, forward_time=0.106, loss_ctc=88.609, loss_att=72.874, acc=0.650, loss=77.594, backward_time=0.772, grad_norm=86.573, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.714e-04, train_time=2.281
+[gpua014:0/64] 2023-07-03 03:32:26,673 (trainer:732) INFO: 6epoch:train:2601-2700batch: iter_time=1.390e-04, forward_time=0.106, loss_ctc=78.574, loss_att=56.358, acc=0.641, loss=63.022, backward_time=0.785, grad_norm=86.307, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.712e-04, train_time=2.380
+[gpua014:0/64] 2023-07-03 03:34:13,713 (trainer:732) INFO: 6epoch:train:2701-2800batch: iter_time=1.206e-04, forward_time=0.106, loss_ctc=86.005, loss_att=67.629, acc=0.635, loss=73.142, backward_time=0.755, grad_norm=92.662, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.710e-04, train_time=2.141
+[gpua014:0/64] 2023-07-03 03:36:12,217 (trainer:732) INFO: 6epoch:train:2801-2900batch: iter_time=1.203e-04, forward_time=0.107, loss_ctc=77.781, loss_att=57.070, acc=0.637, loss=63.284, backward_time=0.784, grad_norm=81.714, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.708e-04, train_time=2.370
+[gpua014:0/64] 2023-07-03 03:38:12,696 (trainer:732) INFO: 6epoch:train:2901-3000batch: iter_time=1.140e-04, forward_time=0.107, loss_ctc=79.278, loss_att=67.765, acc=0.659, loss=71.219, backward_time=0.784, grad_norm=83.937, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.706e-04, train_time=2.409
+[gpua014:0/64] 2023-07-03 03:38:32,724 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-07-03 03:38:54,889 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 03:38:59,407 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4683ffd4e0>)
+[gpua014:0/64] 2023-07-03 03:38:59,407 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpua014:0/64] 2023-07-03 03:38:59,414 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 03:47:22,887 (trainer:732) INFO: 6epoch:train:3001-3100batch: iter_time=2.049, forward_time=0.203, loss_ctc=93.250, loss_att=76.037, acc=0.648, loss=81.201, backward_time=0.781, grad_norm=111.928, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.114, optim0_lr0=1.704e-04, train_time=11.003
+[gpua014:0/64] 2023-07-03 03:49:03,103 (trainer:732) INFO: 6epoch:train:3101-3200batch: iter_time=1.112e-04, forward_time=0.108, loss_ctc=74.851, loss_att=59.766, acc=0.647, loss=64.291, backward_time=0.749, grad_norm=100.797, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.702e-04, train_time=2.005
+[gpua014:0/64] 2023-07-03 03:50:42,447 (trainer:732) INFO: 6epoch:train:3201-3300batch: iter_time=1.076e-04, forward_time=0.107, loss_ctc=92.365, loss_att=72.114, acc=0.649, loss=78.189, backward_time=0.746, grad_norm=87.314, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.111, optim0_lr0=1.700e-04, train_time=1.987
+[gpua014:0/64] 2023-07-03 03:52:24,350 (trainer:732) INFO: 6epoch:train:3301-3400batch: iter_time=1.112e-04, forward_time=0.107, loss_ctc=82.555, loss_att=64.473, acc=0.642, loss=69.898, backward_time=0.749, grad_norm=85.977, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.698e-04, train_time=2.038
+[gpua014:0/64] 2023-07-03 03:54:09,308 (trainer:732) INFO: 6epoch:train:3401-3500batch: iter_time=1.150e-04, forward_time=0.108, loss_ctc=87.481, loss_att=69.748, acc=0.663, loss=75.068, backward_time=0.755, grad_norm=83.819, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.696e-04, train_time=2.099
+[gpua014:0/64] 2023-07-03 03:55:58,512 (trainer:732) INFO: 6epoch:train:3501-3600batch: iter_time=1.113e-04, forward_time=0.107, loss_ctc=86.174, loss_att=72.123, acc=0.653, loss=76.338, backward_time=0.758, grad_norm=77.571, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.694e-04, train_time=2.184
+[gpua014:0/64] 2023-07-03 03:57:56,904 (trainer:732) INFO: 6epoch:train:3601-3700batch: iter_time=1.023e-04, forward_time=0.106, loss_ctc=77.503, loss_att=56.289, acc=0.640, loss=62.653, backward_time=0.765, grad_norm=85.979, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.692e-04, train_time=2.368
+[gpua014:0/64] 2023-07-03 03:59:46,927 (trainer:732) INFO: 6epoch:train:3701-3800batch: iter_time=1.034e-04, forward_time=0.107, loss_ctc=85.136, loss_att=69.418, acc=0.636, loss=74.133, backward_time=0.758, grad_norm=91.412, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.690e-04, train_time=2.200
+[gpua014:0/64] 2023-07-03 04:01:52,708 (trainer:732) INFO: 6epoch:train:3801-3900batch: iter_time=1.068e-04, forward_time=0.107, loss_ctc=77.495, loss_att=56.241, acc=0.639, loss=62.617, backward_time=0.794, grad_norm=87.115, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.688e-04, train_time=2.515
+[gpua014:0/64] 2023-07-03 04:03:48,873 (trainer:732) INFO: 6epoch:train:3901-4000batch: iter_time=9.811e-05, forward_time=0.107, loss_ctc=80.520, loss_att=67.275, acc=0.662, loss=71.248, backward_time=0.771, grad_norm=76.522, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.112, optim0_lr0=1.686e-04, train_time=2.323
+[gpua014:0/64] 2023-07-03 04:03:51,017 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-07-03 04:04:13,127 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 04:04:17,367 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d54e7b460>)
+[gpua014:0/64] 2023-07-03 04:04:17,367 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpua014:0/64] 2023-07-03 04:04:17,471 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 04:09:31,917 (trainer:732) INFO: 6epoch:train:4001-4100batch: iter_time=2.215, forward_time=0.186, loss_ctc=88.842, loss_att=76.014, acc=0.651, loss=79.862, backward_time=0.767, grad_norm=105.736, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.116, optim0_lr0=1.684e-04, train_time=6.860
+[gpua014:0/64] 2023-07-03 04:11:11,674 (trainer:732) INFO: 6epoch:train:4101-4200batch: iter_time=9.981e-05, forward_time=0.105, loss_ctc=74.425, loss_att=59.999, acc=0.649, loss=64.327, backward_time=0.746, grad_norm=81.310, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.683e-04, train_time=1.995
+[gpua014:0/64] 2023-07-03 04:12:51,181 (trainer:732) INFO: 6epoch:train:4201-4300batch: iter_time=9.832e-05, forward_time=0.105, loss_ctc=91.340, loss_att=70.328, acc=0.655, loss=76.631, backward_time=0.747, grad_norm=83.428, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.681e-04, train_time=1.990
+[gpua014:0/64] 2023-07-03 04:14:30,517 (trainer:732) INFO: 6epoch:train:4301-4400batch: iter_time=9.483e-05, forward_time=0.105, loss_ctc=83.233, loss_att=64.260, acc=0.643, loss=69.951, backward_time=0.746, grad_norm=100.262, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.679e-04, train_time=1.986
+[gpua014:0/64] 2023-07-03 04:16:09,792 (trainer:732) INFO: 6epoch:train:4401-4500batch: iter_time=9.761e-05, forward_time=0.105, loss_ctc=90.368, loss_att=70.577, acc=0.662, loss=76.514, backward_time=0.746, grad_norm=81.411, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.677e-04, train_time=1.985
+[gpua014:0/64] 2023-07-03 04:18:01,751 (trainer:732) INFO: 6epoch:train:4501-4600batch: iter_time=1.016e-04, forward_time=0.106, loss_ctc=85.446, loss_att=71.129, acc=0.659, loss=75.424, backward_time=0.770, grad_norm=86.667, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.675e-04, train_time=2.239
+[gpua014:0/64] 2023-07-03 04:19:41,277 (trainer:732) INFO: 6epoch:train:4601-4700batch: iter_time=9.728e-05, forward_time=0.106, loss_ctc=76.773, loss_att=54.965, acc=0.648, loss=61.508, backward_time=0.746, grad_norm=87.491, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.673e-04, train_time=1.990
+[gpua014:0/64] 2023-07-03 04:21:39,719 (trainer:732) INFO: 6epoch:train:4701-4800batch: iter_time=9.446e-05, forward_time=0.106, loss_ctc=82.772, loss_att=66.380, acc=0.644, loss=71.297, backward_time=0.777, grad_norm=83.858, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.671e-04, train_time=2.369
+[gpua014:0/64] 2023-07-03 04:23:44,102 (trainer:732) INFO: 6epoch:train:4801-4900batch: iter_time=9.355e-05, forward_time=0.106, loss_ctc=77.056, loss_att=56.067, acc=0.641, loss=62.364, backward_time=0.814, grad_norm=84.528, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.669e-04, train_time=2.487
+[gpua014:0/64] 2023-07-03 04:25:47,722 (trainer:732) INFO: 6epoch:train:4901-5000batch: iter_time=0.007, forward_time=0.203, loss_ctc=80.900, loss_att=69.487, acc=0.659, loss=72.911, backward_time=0.814, grad_norm=91.357, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.116, optim0_lr0=1.668e-04, train_time=2.472
+[gpua014:0/64] 2023-07-03 04:26:07,789 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-07-03 04:26:30,505 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 04:26:34,798 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d693337c0>)
+[gpua014:0/64] 2023-07-03 04:26:34,799 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpua014:0/64] 2023-07-03 04:26:34,806 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 04:31:31,000 (trainer:732) INFO: 6epoch:train:5001-5100batch: iter_time=2.233, forward_time=0.159, loss_ctc=89.132, loss_att=77.996, acc=0.636, loss=81.337, backward_time=0.767, grad_norm=118.324, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.116, optim0_lr0=1.666e-04, train_time=6.865
+[gpua014:0/64] 2023-07-03 04:33:10,944 (trainer:732) INFO: 6epoch:train:5101-5200batch: iter_time=1.113e-04, forward_time=0.107, loss_ctc=74.757, loss_att=62.723, acc=0.634, loss=66.333, backward_time=0.747, grad_norm=76.771, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.664e-04, train_time=1.999
+[gpua014:0/64] 2023-07-03 04:34:50,322 (trainer:732) INFO: 6epoch:train:5201-5300batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=94.212, loss_att=73.668, acc=0.649, loss=79.831, backward_time=0.746, grad_norm=95.822, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.662e-04, train_time=1.987
+[gpua014:0/64] 2023-07-03 04:36:29,532 (trainer:732) INFO: 6epoch:train:5301-5400batch: iter_time=1.232e-04, forward_time=0.106, loss_ctc=81.876, loss_att=63.503, acc=0.639, loss=69.015, backward_time=0.745, grad_norm=77.236, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.660e-04, train_time=1.984
+[gpua014:0/64] 2023-07-03 04:38:18,974 (trainer:732) INFO: 6epoch:train:5401-5500batch: iter_time=1.173e-04, forward_time=0.106, loss_ctc=87.849, loss_att=70.680, acc=0.660, loss=75.831, backward_time=0.767, grad_norm=75.704, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.658e-04, train_time=2.189
+[gpua014:0/64] 2023-07-03 04:40:16,088 (trainer:732) INFO: 6epoch:train:5501-5600batch: iter_time=1.242e-04, forward_time=0.106, loss_ctc=85.866, loss_att=70.686, acc=0.652, loss=75.240, backward_time=0.780, grad_norm=90.447, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.657e-04, train_time=2.342
+[gpua014:0/64] 2023-07-03 04:41:55,475 (trainer:732) INFO: 6epoch:train:5601-5700batch: iter_time=1.210e-04, forward_time=0.105, loss_ctc=75.238, loss_att=56.204, acc=0.638, loss=61.914, backward_time=0.745, grad_norm=84.200, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.655e-04, train_time=1.988
+[gpua014:0/64] 2023-07-03 04:43:38,616 (trainer:732) INFO: 6epoch:train:5701-5800batch: iter_time=1.118e-04, forward_time=0.105, loss_ctc=85.090, loss_att=66.747, acc=0.631, loss=72.250, backward_time=0.751, grad_norm=86.670, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.653e-04, train_time=2.063
+[gpua014:0/64] 2023-07-03 04:45:39,837 (trainer:732) INFO: 6epoch:train:5801-5900batch: iter_time=1.146e-04, forward_time=0.106, loss_ctc=75.722, loss_att=55.597, acc=0.634, loss=61.634, backward_time=0.780, grad_norm=82.304, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.651e-04, train_time=2.424
+[gpua014:0/64] 2023-07-03 04:47:39,154 (trainer:732) INFO: 6epoch:train:5901-6000batch: iter_time=1.108e-04, forward_time=0.107, loss_ctc=77.339, loss_att=66.161, acc=0.663, loss=69.514, backward_time=0.769, grad_norm=77.476, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.649e-04, train_time=2.386
+[gpua014:0/64] 2023-07-03 04:47:59,182 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-07-03 04:48:21,691 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 04:48:25,979 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d6984fb50>)
+[gpua014:0/64] 2023-07-03 04:48:25,979 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpua014:0/64] 2023-07-03 04:48:25,986 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 04:54:06,958 (trainer:732) INFO: 6epoch:train:6001-6100batch: iter_time=1.850, forward_time=0.163, loss_ctc=87.571, loss_att=75.585, acc=0.643, loss=79.181, backward_time=0.768, grad_norm=97.971, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.113, optim0_lr0=1.648e-04, train_time=7.755
+[gpua014:0/64] 2023-07-03 04:55:46,985 (trainer:732) INFO: 6epoch:train:6101-6200batch: iter_time=9.642e-05, forward_time=0.105, loss_ctc=73.381, loss_att=60.787, acc=0.640, loss=64.565, backward_time=0.747, grad_norm=71.943, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.646e-04, train_time=2.001
+[gpua014:0/64] 2023-07-03 04:57:27,443 (trainer:732) INFO: 6epoch:train:6201-6300batch: iter_time=1.004e-04, forward_time=0.105, loss_ctc=90.091, loss_att=70.809, acc=0.655, loss=76.594, backward_time=0.748, grad_norm=127.042, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.644e-04, train_time=2.009
+[gpua014:0/64] 2023-07-03 04:59:11,615 (trainer:732) INFO: 6epoch:train:6301-6400batch: iter_time=9.925e-05, forward_time=0.105, loss_ctc=82.087, loss_att=62.509, acc=0.641, loss=68.382, backward_time=0.769, grad_norm=88.434, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.642e-04, train_time=2.083
+[gpua014:0/64] 2023-07-03 05:00:54,719 (trainer:732) INFO: 6epoch:train:6401-6500batch: iter_time=1.032e-04, forward_time=0.122, loss_ctc=84.899, loss_att=68.086, acc=0.664, loss=73.129, backward_time=0.755, grad_norm=74.987, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.640e-04, train_time=2.059
+[gpua014:0/64] 2023-07-03 05:02:45,070 (trainer:732) INFO: 6epoch:train:6501-6600batch: iter_time=1.054e-04, forward_time=0.105, loss_ctc=83.659, loss_att=69.016, acc=0.660, loss=73.409, backward_time=0.783, grad_norm=84.921, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.639e-04, train_time=2.210
+[gpua014:0/64] 2023-07-03 05:04:29,809 (trainer:732) INFO: 6epoch:train:6601-6700batch: iter_time=1.032e-04, forward_time=0.105, loss_ctc=74.958, loss_att=54.557, acc=0.644, loss=60.678, backward_time=0.756, grad_norm=102.845, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.637e-04, train_time=2.095
+[gpua014:0/64] 2023-07-03 05:06:13,778 (trainer:732) INFO: 6epoch:train:6701-6800batch: iter_time=1.038e-04, forward_time=0.106, loss_ctc=83.016, loss_att=65.807, acc=0.635, loss=70.970, backward_time=0.759, grad_norm=97.302, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.110, optim0_lr0=1.635e-04, train_time=2.079
+[gpua014:0/64] 2023-07-03 05:08:19,430 (trainer:732) INFO: 6epoch:train:6801-6900batch: iter_time=1.233e-04, forward_time=0.122, loss_ctc=77.686, loss_att=56.492, acc=0.635, loss=62.850, backward_time=0.800, grad_norm=93.416, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.114, optim0_lr0=1.633e-04, train_time=2.513
+[gpua014:0/64] 2023-07-03 05:10:18,611 (trainer:732) INFO: 6epoch:train:6901-7000batch: iter_time=1.031e-04, forward_time=0.107, loss_ctc=77.728, loss_att=65.800, acc=0.665, loss=69.378, backward_time=0.783, grad_norm=82.751, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.632e-04, train_time=2.383
+[gpua014:0/64] 2023-07-03 05:10:32,073 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-07-03 05:10:54,606 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 05:10:58,924 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d73152500>)
+[gpua014:0/64] 2023-07-03 05:10:58,925 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpua014:0/64] 2023-07-03 05:10:58,932 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 05:15:48,763 (trainer:732) INFO: 6epoch:train:7001-7100batch: iter_time=2.120, forward_time=0.144, loss_ctc=88.048, loss_att=75.035, acc=0.650, loss=78.939, backward_time=0.768, grad_norm=95.486, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.630e-04, train_time=6.603
+[gpua014:0/64] 2023-07-03 05:17:28,660 (trainer:732) INFO: 6epoch:train:7101-7200batch: iter_time=9.898e-05, forward_time=0.107, loss_ctc=72.504, loss_att=60.062, acc=0.653, loss=63.794, backward_time=0.747, grad_norm=105.157, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.628e-04, train_time=1.998
+[gpua014:0/64] 2023-07-03 05:19:07,845 (trainer:732) INFO: 6epoch:train:7201-7300batch: iter_time=9.656e-05, forward_time=0.106, loss_ctc=91.719, loss_att=69.674, acc=0.659, loss=76.288, backward_time=0.745, grad_norm=85.801, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.626e-04, train_time=1.983
+[gpua014:0/64] 2023-07-03 05:20:47,324 (trainer:732) INFO: 6epoch:train:7301-7400batch: iter_time=9.789e-05, forward_time=0.107, loss_ctc=80.404, loss_att=61.463, acc=0.651, loss=67.146, backward_time=0.747, grad_norm=78.615, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.625e-04, train_time=1.989
+[gpua014:0/64] 2023-07-03 05:22:43,044 (trainer:732) INFO: 6epoch:train:7401-7500batch: iter_time=1.046e-04, forward_time=0.105, loss_ctc=86.992, loss_att=69.038, acc=0.670, loss=74.424, backward_time=0.768, grad_norm=79.027, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.623e-04, train_time=2.314
+[gpua014:0/64] 2023-07-03 05:24:37,024 (trainer:732) INFO: 6epoch:train:7501-7600batch: iter_time=9.948e-05, forward_time=0.106, loss_ctc=83.558, loss_att=70.280, acc=0.662, loss=74.264, backward_time=0.780, grad_norm=76.423, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.621e-04, train_time=2.279
+[gpua014:0/64] 2023-07-03 05:26:23,669 (trainer:732) INFO: 6epoch:train:7601-7700batch: iter_time=9.911e-05, forward_time=0.105, loss_ctc=74.621, loss_att=54.975, acc=0.655, loss=60.869, backward_time=0.757, grad_norm=80.254, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.620e-04, train_time=2.133
+[gpua014:0/64] 2023-07-03 05:28:16,661 (trainer:732) INFO: 6epoch:train:7701-7800batch: iter_time=9.433e-05, forward_time=0.106, loss_ctc=85.326, loss_att=66.803, acc=0.645, loss=72.360, backward_time=0.766, grad_norm=105.083, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.618e-04, train_time=2.260
+[gpua014:0/64] 2023-07-03 05:29:55,886 (trainer:732) INFO: 6epoch:train:7801-7900batch: iter_time=9.898e-05, forward_time=0.105, loss_ctc=75.877, loss_att=55.711, acc=0.645, loss=61.761, backward_time=0.745, grad_norm=86.470, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.616e-04, train_time=1.984
+[gpua014:0/64] 2023-07-03 05:31:49,297 (trainer:732) INFO: 6epoch:train:7901-8000batch: iter_time=9.372e-05, forward_time=0.106, loss_ctc=76.947, loss_att=65.617, acc=0.671, loss=69.016, backward_time=0.768, grad_norm=79.259, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.111, optim0_lr0=1.615e-04, train_time=2.268
+[gpua014:0/64] 2023-07-03 05:31:51,126 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-07-03 05:32:13,485 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 05:32:17,730 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d9eee3460>)
+[gpua014:0/64] 2023-07-03 05:32:17,730 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpua014:0/64] 2023-07-03 05:32:17,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 05:37:41,373 (trainer:732) INFO: 6epoch:train:8001-8100batch: iter_time=1.653, forward_time=0.142, loss_ctc=89.665, loss_att=75.977, acc=0.646, loss=80.083, backward_time=0.771, grad_norm=88.148, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.613e-04, train_time=7.041
+[gpua014:0/64] 2023-07-03 05:39:38,281 (trainer:732) INFO: 6epoch:train:8101-8200batch: iter_time=9.604e-05, forward_time=0.105, loss_ctc=73.797, loss_att=60.863, acc=0.641, loss=64.743, backward_time=0.781, grad_norm=77.873, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.611e-04, train_time=2.338
+[gpua014:0/64] 2023-07-03 05:41:36,301 (trainer:732) INFO: 6epoch:train:8201-8300batch: iter_time=1.047e-04, forward_time=0.106, loss_ctc=89.977, loss_att=71.057, acc=0.654, loss=76.733, backward_time=0.780, grad_norm=87.163, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.610e-04, train_time=2.360
+[gpua014:0/64] 2023-07-03 05:43:33,779 (trainer:732) INFO: 6epoch:train:8301-8400batch: iter_time=9.713e-05, forward_time=0.107, loss_ctc=81.141, loss_att=62.409, acc=0.643, loss=68.028, backward_time=0.780, grad_norm=79.842, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.608e-04, train_time=2.349
+[gpua014:0/64] 2023-07-03 05:45:32,539 (trainer:732) INFO: 6epoch:train:8401-8500batch: iter_time=9.466e-05, forward_time=0.106, loss_ctc=86.755, loss_att=69.796, acc=0.661, loss=74.884, backward_time=0.801, grad_norm=82.255, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.606e-04, train_time=2.375
+[gpua014:0/64] 2023-07-03 05:47:25,967 (trainer:732) INFO: 6epoch:train:8501-8600batch: iter_time=9.814e-05, forward_time=0.106, loss_ctc=84.611, loss_att=69.021, acc=0.659, loss=73.698, backward_time=0.777, grad_norm=88.656, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.605e-04, train_time=2.268
+[gpua014:0/64] 2023-07-03 05:49:19,536 (trainer:732) INFO: 6epoch:train:8601-8700batch: iter_time=9.950e-05, forward_time=0.106, loss_ctc=72.996, loss_att=53.800, acc=0.651, loss=59.559, backward_time=0.784, grad_norm=82.001, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.603e-04, train_time=2.271
+[gpua014:0/64] 2023-07-03 05:51:23,335 (trainer:732) INFO: 6epoch:train:8701-8800batch: iter_time=9.708e-05, forward_time=0.106, loss_ctc=83.993, loss_att=66.774, acc=0.636, loss=71.940, backward_time=0.827, grad_norm=106.452, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.601e-04, train_time=2.476
+[gpua014:0/64] 2023-07-03 05:53:17,393 (trainer:732) INFO: 6epoch:train:8801-8900batch: iter_time=9.769e-05, forward_time=0.106, loss_ctc=77.297, loss_att=55.638, acc=0.636, loss=62.136, backward_time=0.764, grad_norm=84.563, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.600e-04, train_time=2.281
+[gpua014:0/64] 2023-07-03 05:55:39,873 (trainer:732) INFO: 6epoch:train:8901-9000batch: iter_time=5.865e-04, forward_time=0.129, loss_ctc=78.944, loss_att=65.397, acc=0.665, loss=69.461, backward_time=0.808, grad_norm=87.114, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.116, optim0_lr0=1.598e-04, train_time=2.849
+[gpua014:0/64] 2023-07-03 05:55:59,938 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-07-03 05:56:22,526 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 05:56:26,824 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d733c9180>)
+[gpua014:0/64] 2023-07-03 05:56:26,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpua014:0/64] 2023-07-03 05:56:26,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 06:03:52,584 (trainer:732) INFO: 6epoch:train:9001-9100batch: iter_time=2.073, forward_time=0.141, loss_ctc=88.284, loss_att=74.413, acc=0.652, loss=78.574, backward_time=0.768, grad_norm=114.120, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.596e-04, train_time=9.853
+[gpua014:0/64] 2023-07-03 06:05:32,019 (trainer:732) INFO: 6epoch:train:9101-9200batch: iter_time=1.009e-04, forward_time=0.105, loss_ctc=72.526, loss_att=60.398, acc=0.642, loss=64.036, backward_time=0.747, grad_norm=88.042, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.595e-04, train_time=1.989
+[gpua014:0/64] 2023-07-03 06:07:17,745 (trainer:732) INFO: 6epoch:train:9201-9300batch: iter_time=9.225e-05, forward_time=0.106, loss_ctc=88.224, loss_att=68.416, acc=0.662, loss=74.359, backward_time=0.758, grad_norm=83.370, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.593e-04, train_time=2.114
+[gpua014:0/64] 2023-07-03 06:09:06,317 (trainer:732) INFO: 6epoch:train:9301-9400batch: iter_time=1.235e-04, forward_time=0.113, loss_ctc=79.435, loss_att=61.439, acc=0.647, loss=66.838, backward_time=0.769, grad_norm=85.096, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.591e-04, train_time=2.171
+[gpua014:0/64] 2023-07-03 06:10:51,994 (trainer:732) INFO: 6epoch:train:9401-9500batch: iter_time=1.235e-04, forward_time=0.134, loss_ctc=85.244, loss_att=67.940, acc=0.667, loss=73.131, backward_time=0.759, grad_norm=77.725, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.118, optim0_lr0=1.590e-04, train_time=2.113
+[gpua014:0/64] 2023-07-03 06:12:33,563 (trainer:732) INFO: 6epoch:train:9501-9600batch: iter_time=1.212e-04, forward_time=0.116, loss_ctc=83.273, loss_att=68.461, acc=0.664, loss=72.904, backward_time=0.747, grad_norm=79.840, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.588e-04, train_time=2.031
+[gpua014:0/64] 2023-07-03 06:14:26,421 (trainer:732) INFO: 6epoch:train:9601-9700batch: iter_time=1.196e-04, forward_time=0.107, loss_ctc=73.391, loss_att=53.716, acc=0.650, loss=59.619, backward_time=0.761, grad_norm=81.447, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.587e-04, train_time=2.257
+[gpua014:0/64] 2023-07-03 06:16:27,245 (trainer:732) INFO: 6epoch:train:9701-9800batch: iter_time=2.269e-04, forward_time=0.135, loss_ctc=83.341, loss_att=65.480, acc=0.639, loss=70.839, backward_time=0.769, grad_norm=86.702, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.111, optim0_lr0=1.585e-04, train_time=2.416
+[gpua014:0/64] 2023-07-03 06:18:38,458 (trainer:732) INFO: 6epoch:train:9801-9900batch: iter_time=1.187e-04, forward_time=0.105, loss_ctc=75.430, loss_att=54.355, acc=0.644, loss=60.678, backward_time=0.817, grad_norm=86.330, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.583e-04, train_time=2.624
+[gpua014:0/64] 2023-07-03 06:20:41,062 (trainer:732) INFO: 6epoch:train:9901-10000batch: iter_time=1.247e-04, forward_time=0.105, loss_ctc=78.271, loss_att=65.479, acc=0.666, loss=69.316, backward_time=0.787, grad_norm=81.913, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.110, optim0_lr0=1.582e-04, train_time=2.451
+[gpua014:0/64] 2023-07-03 06:32:34,851 (trainer:338) INFO: 6epoch results: [train] iter_time=0.207, forward_time=0.117, loss_ctc=83.321, loss_att=66.284, acc=0.646, loss=71.395, backward_time=0.769, grad_norm=89.618, clip=100.000, loss_scale=1.342e+08, optim_step_time=0.112, optim0_lr0=1.669e-04, train_time=2.823, time=3 hours, 55 minutes and 37.95 seconds, total_count=30000, gpu_max_cached_mem_GB=34.184, [valid] loss_ctc=65.053, cer_ctc=0.342, loss_att=52.110, acc=0.582, cer=0.475, wer=1.000, loss=55.993, time=5 minutes and 34.4 seconds, total_count=3542, gpu_max_cached_mem_GB=37.479, [att_plot] time=5 minutes and 55.66 seconds, total_count=0, gpu_max_cached_mem_GB=37.479
+[gpua014:0/64] 2023-07-03 06:32:53,824 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-07-03 06:32:53,855 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/1epoch.pth
+[gpua014:0/64] 2023-07-03 06:32:53,943 (trainer:272) INFO: 7/100epoch started. Estimated time to finish: 2 weeks, 2 days and 3 hours
+[gpua014:0/64] 2023-07-03 06:32:55,418 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-07-03 06:33:18,973 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 06:33:23,949 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4ef7b6aec0>)
+[gpua014:0/64] 2023-07-03 06:33:23,949 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpua014:0/64] 2023-07-03 06:33:24,009 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 06:46:45,718 (trainer:732) INFO: 7epoch:train:1-100batch: iter_time=7.202, forward_time=0.177, loss_ctc=95.276, loss_att=81.548, acc=0.618, loss=85.666, backward_time=0.772, grad_norm=101.106, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.115, optim0_lr0=1.580e-04, train_time=16.619
+[gpua014:0/64] 2023-07-03 06:48:26,044 (trainer:732) INFO: 7epoch:train:101-200batch: iter_time=1.246e-04, forward_time=0.108, loss_ctc=78.282, loss_att=64.557, acc=0.642, loss=68.675, backward_time=0.755, grad_norm=81.816, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.579e-04, train_time=2.006
+[gpua014:0/64] 2023-07-03 06:50:06,508 (trainer:732) INFO: 7epoch:train:201-300batch: iter_time=1.371e-04, forward_time=0.109, loss_ctc=98.448, loss_att=70.985, acc=0.654, loss=79.224, backward_time=0.755, grad_norm=111.819, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.577e-04, train_time=2.009
+[gpua014:0/64] 2023-07-03 06:51:50,760 (trainer:732) INFO: 7epoch:train:301-400batch: iter_time=3.589e-04, forward_time=0.117, loss_ctc=68.770, loss_att=57.090, acc=0.637, loss=60.594, backward_time=0.758, grad_norm=72.128, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.114, optim0_lr0=1.576e-04, train_time=2.083
+[gpua014:0/64] 2023-07-03 06:53:47,407 (trainer:732) INFO: 7epoch:train:401-500batch: iter_time=1.296e-04, forward_time=0.219, loss_ctc=73.520, loss_att=63.518, acc=0.630, loss=66.518, backward_time=0.767, grad_norm=81.990, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.152, optim0_lr0=1.574e-04, train_time=2.334
+[gpua014:0/64] 2023-07-03 06:55:27,528 (trainer:732) INFO: 7epoch:train:501-600batch: iter_time=1.308e-04, forward_time=0.111, loss_ctc=93.523, loss_att=72.063, acc=0.620, loss=78.501, backward_time=0.751, grad_norm=104.469, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.572e-04, train_time=2.002
+[gpua014:0/64] 2023-07-03 06:57:26,998 (trainer:732) INFO: 7epoch:train:601-700batch: iter_time=9.820e-05, forward_time=0.108, loss_ctc=85.199, loss_att=67.503, acc=0.642, loss=72.812, backward_time=0.817, grad_norm=89.793, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.571e-04, train_time=2.389
+[gpua014:0/64] 2023-07-03 06:59:11,942 (trainer:732) INFO: 7epoch:train:701-800batch: iter_time=9.532e-05, forward_time=0.106, loss_ctc=75.795, loss_att=59.124, acc=0.652, loss=64.125, backward_time=0.761, grad_norm=72.838, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.569e-04, train_time=2.099
+[gpua014:0/64] 2023-07-03 07:01:05,504 (trainer:732) INFO: 7epoch:train:801-900batch: iter_time=9.391e-05, forward_time=0.106, loss_ctc=80.987, loss_att=61.948, acc=0.629, loss=67.660, backward_time=0.771, grad_norm=169.450, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.568e-04, train_time=2.271
+[gpua014:0/64] 2023-07-03 07:02:59,172 (trainer:732) INFO: 7epoch:train:901-1000batch: iter_time=9.789e-05, forward_time=0.107, loss_ctc=94.412, loss_att=71.680, acc=0.634, loss=78.499, backward_time=0.781, grad_norm=87.757, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.566e-04, train_time=2.273
+[gpua014:0/64] 2023-07-03 07:03:13,496 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-07-03 07:03:35,626 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 07:03:40,087 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d69495db0>)
+[gpua014:0/64] 2023-07-03 07:03:40,087 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpua014:0/64] 2023-07-03 07:03:40,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 07:11:32,069 (trainer:732) INFO: 7epoch:train:1001-1100batch: iter_time=2.149, forward_time=0.164, loss_ctc=91.941, loss_att=80.290, acc=0.624, loss=83.785, backward_time=0.772, grad_norm=101.163, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.114, optim0_lr0=1.565e-04, train_time=10.258
+[gpua014:0/64] 2023-07-03 07:13:18,931 (trainer:732) INFO: 7epoch:train:1101-1200batch: iter_time=1.172e-04, forward_time=0.107, loss_ctc=79.552, loss_att=63.497, acc=0.642, loss=68.313, backward_time=0.757, grad_norm=87.614, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.563e-04, train_time=2.137
+[gpua014:0/64] 2023-07-03 07:14:58,837 (trainer:732) INFO: 7epoch:train:1201-1300batch: iter_time=1.165e-04, forward_time=0.107, loss_ctc=95.280, loss_att=67.963, acc=0.658, loss=76.158, backward_time=0.752, grad_norm=166.924, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.562e-04, train_time=1.998
+[gpua014:0/64] 2023-07-03 07:16:47,297 (trainer:732) INFO: 7epoch:train:1301-1400batch: iter_time=1.161e-04, forward_time=0.106, loss_ctc=68.366, loss_att=56.026, acc=0.640, loss=59.728, backward_time=0.771, grad_norm=76.276, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.560e-04, train_time=2.169
+[gpua014:0/64] 2023-07-03 07:18:37,930 (trainer:732) INFO: 7epoch:train:1401-1500batch: iter_time=1.157e-04, forward_time=0.106, loss_ctc=72.529, loss_att=61.471, acc=0.638, loss=64.789, backward_time=0.784, grad_norm=79.624, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.559e-04, train_time=2.212
+[gpua014:0/64] 2023-07-03 07:20:20,428 (trainer:732) INFO: 7epoch:train:1501-1600batch: iter_time=1.275e-04, forward_time=0.107, loss_ctc=92.853, loss_att=71.159, acc=0.623, loss=77.667, backward_time=0.755, grad_norm=97.225, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.557e-04, train_time=2.050
+[gpua014:0/64] 2023-07-03 07:22:06,737 (trainer:732) INFO: 7epoch:train:1601-1700batch: iter_time=1.174e-04, forward_time=0.107, loss_ctc=82.892, loss_att=64.823, acc=0.651, loss=70.244, backward_time=0.761, grad_norm=102.633, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.113, optim0_lr0=1.556e-04, train_time=2.126
+[gpua014:0/64] 2023-07-03 07:24:10,142 (trainer:732) INFO: 7epoch:train:1701-1800batch: iter_time=1.456e-04, forward_time=0.106, loss_ctc=74.365, loss_att=57.403, acc=0.655, loss=62.492, backward_time=0.797, grad_norm=72.839, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.554e-04, train_time=2.468
+[gpua014:0/64] 2023-07-03 07:26:14,745 (trainer:732) INFO: 7epoch:train:1801-1900batch: iter_time=1.130e-04, forward_time=0.107, loss_ctc=76.773, loss_att=58.806, acc=0.640, loss=64.196, backward_time=0.803, grad_norm=78.057, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.553e-04, train_time=2.492
+[gpua014:0/64] 2023-07-03 07:28:26,877 (trainer:732) INFO: 7epoch:train:1901-2000batch: iter_time=1.142e-04, forward_time=0.107, loss_ctc=93.969, loss_att=70.125, acc=0.636, loss=77.279, backward_time=0.796, grad_norm=91.970, clip=100.000, loss_scale=2.684e+08, optim_step_time=0.112, optim0_lr0=1.551e-04, train_time=2.642
+[gpua014:0/64] 2023-07-03 07:28:46,906 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-07-03 07:29:09,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 07:29:13,884 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f40de922ce0>)
+[gpua014:0/64] 2023-07-03 07:29:13,884 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpua014:0/64] 2023-07-03 07:29:13,892 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 07:34:52,459 (trainer:732) INFO: 7epoch:train:2001-2100batch: iter_time=2.218, forward_time=0.170, loss_ctc=90.821, loss_att=77.154, acc=0.640, loss=81.254, backward_time=0.770, grad_norm=94.828, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.114, optim0_lr0=1.550e-04, train_time=7.711
+[gpua014:0/64] 2023-07-03 07:36:51,476 (trainer:732) INFO: 7epoch:train:2101-2200batch: iter_time=1.022e-04, forward_time=0.107, loss_ctc=77.592, loss_att=64.767, acc=0.657, loss=68.614, backward_time=0.798, grad_norm=91.890, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.548e-04, train_time=2.380
+[gpua014:0/64] 2023-07-03 07:38:47,649 (trainer:732) INFO: 7epoch:train:2201-2300batch: iter_time=1.175e-04, forward_time=0.108, loss_ctc=93.080, loss_att=64.886, acc=0.669, loss=73.345, backward_time=0.796, grad_norm=102.296, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.547e-04, train_time=2.323
+[gpua014:0/64] 2023-07-03 07:40:46,777 (trainer:732) INFO: 7epoch:train:2301-2400batch: iter_time=1.174e-04, forward_time=0.107, loss_ctc=69.893, loss_att=57.571, acc=0.647, loss=61.267, backward_time=0.774, grad_norm=72.489, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.545e-04, train_time=2.382
+[gpua014:0/64] 2023-07-03 07:42:36,668 (trainer:732) INFO: 7epoch:train:2401-2500batch: iter_time=1.217e-04, forward_time=0.108, loss_ctc=71.496, loss_att=60.242, acc=0.652, loss=63.618, backward_time=0.765, grad_norm=87.366, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.544e-04, train_time=2.198
+[gpua014:0/64] 2023-07-03 07:44:33,450 (trainer:732) INFO: 7epoch:train:2501-2600batch: iter_time=1.202e-04, forward_time=0.107, loss_ctc=93.104, loss_att=71.943, acc=0.630, loss=78.291, backward_time=0.779, grad_norm=117.869, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.542e-04, train_time=2.335
+[gpua014:0/64] 2023-07-03 07:46:44,195 (trainer:732) INFO: 7epoch:train:2601-2700batch: iter_time=1.161e-04, forward_time=0.108, loss_ctc=82.752, loss_att=66.339, acc=0.656, loss=71.263, backward_time=0.798, grad_norm=94.956, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.541e-04, train_time=2.615
+[gpua014:0/64] 2023-07-03 07:49:06,512 (trainer:732) INFO: 7epoch:train:2701-2800batch: iter_time=1.162e-04, forward_time=0.107, loss_ctc=76.092, loss_att=57.947, acc=0.662, loss=63.391, backward_time=0.854, grad_norm=96.852, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.539e-04, train_time=2.846
+[gpua014:0/64] 2023-07-03 07:51:15,092 (trainer:732) INFO: 7epoch:train:2801-2900batch: iter_time=1.231e-04, forward_time=0.107, loss_ctc=77.220, loss_att=60.708, acc=0.646, loss=65.661, backward_time=0.780, grad_norm=79.761, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.538e-04, train_time=2.571
+[gpua014:0/64] 2023-07-03 07:53:30,532 (trainer:732) INFO: 7epoch:train:2901-3000batch: iter_time=1.155e-04, forward_time=0.107, loss_ctc=92.640, loss_att=69.053, acc=0.651, loss=76.129, backward_time=0.817, grad_norm=105.478, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.536e-04, train_time=2.709
+[gpua014:0/64] 2023-07-03 07:53:32,581 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-07-03 07:53:54,651 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 07:53:59,259 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f41cef1b610>)
+[gpua014:0/64] 2023-07-03 07:53:59,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpua014:0/64] 2023-07-03 07:53:59,267 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 08:01:50,181 (trainer:732) INFO: 7epoch:train:3001-3100batch: iter_time=1.542, forward_time=0.157, loss_ctc=89.125, loss_att=75.893, acc=0.641, loss=79.863, backward_time=0.770, grad_norm=101.349, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.115, optim0_lr0=1.535e-04, train_time=9.993
+[gpua014:0/64] 2023-07-03 08:03:31,460 (trainer:732) INFO: 7epoch:train:3101-3200batch: iter_time=1.089e-04, forward_time=0.109, loss_ctc=75.488, loss_att=61.887, acc=0.663, loss=65.968, backward_time=0.756, grad_norm=75.186, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.534e-04, train_time=2.025
+[gpua014:0/64] 2023-07-03 08:05:29,565 (trainer:732) INFO: 7epoch:train:3201-3300batch: iter_time=1.154e-04, forward_time=0.108, loss_ctc=93.877, loss_att=64.871, acc=0.673, loss=73.573, backward_time=0.778, grad_norm=102.635, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.532e-04, train_time=2.362
+[gpua014:0/64] 2023-07-03 08:07:28,930 (trainer:732) INFO: 7epoch:train:3301-3400batch: iter_time=1.155e-04, forward_time=0.107, loss_ctc=67.271, loss_att=55.343, acc=0.658, loss=58.921, backward_time=0.803, grad_norm=76.950, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.531e-04, train_time=2.387
+[gpua014:0/64] 2023-07-03 08:09:13,964 (trainer:732) INFO: 7epoch:train:3401-3500batch: iter_time=1.163e-04, forward_time=0.107, loss_ctc=70.710, loss_att=59.768, acc=0.654, loss=63.051, backward_time=0.755, grad_norm=75.149, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.529e-04, train_time=2.100
+[gpua014:0/64] 2023-07-03 08:11:10,680 (trainer:732) INFO: 7epoch:train:3501-3600batch: iter_time=1.144e-04, forward_time=0.108, loss_ctc=89.658, loss_att=70.387, acc=0.636, loss=76.168, backward_time=0.767, grad_norm=89.805, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.528e-04, train_time=2.334
+[gpua014:0/64] 2023-07-03 08:13:17,684 (trainer:732) INFO: 7epoch:train:3601-3700batch: iter_time=6.304e-04, forward_time=0.165, loss_ctc=82.508, loss_att=65.475, acc=0.661, loss=70.585, backward_time=0.819, grad_norm=78.875, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.118, optim0_lr0=1.526e-04, train_time=2.539
+[gpua014:0/64] 2023-07-03 08:15:30,212 (trainer:732) INFO: 7epoch:train:3701-3800batch: iter_time=1.124e-04, forward_time=0.109, loss_ctc=72.864, loss_att=56.506, acc=0.669, loss=61.413, backward_time=0.808, grad_norm=70.706, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.525e-04, train_time=2.651
+[gpua014:0/64] 2023-07-03 08:17:47,257 (trainer:732) INFO: 7epoch:train:3801-3900batch: iter_time=1.222e-04, forward_time=0.107, loss_ctc=77.452, loss_att=59.300, acc=0.648, loss=64.746, backward_time=0.810, grad_norm=91.894, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.524e-04, train_time=2.741
+[gpua014:0/64] 2023-07-03 08:19:44,368 (trainer:732) INFO: 7epoch:train:3901-4000batch: iter_time=1.179e-04, forward_time=0.108, loss_ctc=91.937, loss_att=68.717, acc=0.652, loss=75.683, backward_time=0.778, grad_norm=90.949, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.522e-04, train_time=2.342
+[gpua014:0/64] 2023-07-03 08:19:59,462 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-07-03 08:20:21,953 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 08:20:26,425 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d73e6bf40>)
+[gpua014:0/64] 2023-07-03 08:20:26,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpua014:0/64] 2023-07-03 08:20:26,432 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 08:28:58,296 (trainer:732) INFO: 7epoch:train:4001-4100batch: iter_time=2.615, forward_time=0.108, loss_ctc=87.423, loss_att=75.461, acc=0.635, loss=79.049, backward_time=0.770, grad_norm=94.678, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.521e-04, train_time=11.078
+[gpua014:0/64] 2023-07-03 08:30:38,954 (trainer:732) INFO: 7epoch:train:4101-4200batch: iter_time=1.380e-04, forward_time=0.108, loss_ctc=75.872, loss_att=62.613, acc=0.654, loss=66.591, backward_time=0.755, grad_norm=82.364, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.519e-04, train_time=2.013
+[gpua014:0/64] 2023-07-03 08:32:19,075 (trainer:732) INFO: 7epoch:train:4201-4300batch: iter_time=1.023e-04, forward_time=0.107, loss_ctc=94.042, loss_att=66.149, acc=0.664, loss=74.517, backward_time=0.753, grad_norm=117.332, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.518e-04, train_time=2.002
+[gpua014:0/64] 2023-07-03 08:34:01,605 (trainer:732) INFO: 7epoch:train:4301-4400batch: iter_time=9.928e-05, forward_time=0.107, loss_ctc=66.175, loss_att=55.333, acc=0.650, loss=58.585, backward_time=0.759, grad_norm=81.191, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.517e-04, train_time=2.050
+[gpua014:0/64] 2023-07-03 08:36:00,009 (trainer:732) INFO: 7epoch:train:4401-4500batch: iter_time=1.002e-04, forward_time=0.106, loss_ctc=70.262, loss_att=59.536, acc=0.653, loss=62.754, backward_time=0.774, grad_norm=75.645, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.515e-04, train_time=2.368
+[gpua014:0/64] 2023-07-03 08:38:05,032 (trainer:732) INFO: 7epoch:train:4501-4600batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=89.583, loss_att=69.758, acc=0.629, loss=75.705, backward_time=0.786, grad_norm=99.400, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.514e-04, train_time=2.500
+[gpua014:0/64] 2023-07-03 08:39:50,778 (trainer:732) INFO: 7epoch:train:4601-4700batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=84.138, loss_att=65.587, acc=0.655, loss=71.152, backward_time=0.765, grad_norm=92.075, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.512e-04, train_time=2.115
+[gpua014:0/64] 2023-07-03 08:41:40,638 (trainer:732) INFO: 7epoch:train:4701-4800batch: iter_time=1.006e-04, forward_time=0.106, loss_ctc=73.197, loss_att=56.452, acc=0.665, loss=61.475, backward_time=0.766, grad_norm=80.960, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.112, optim0_lr0=1.511e-04, train_time=2.197
+[gpua014:0/64] 2023-07-03 08:43:38,356 (trainer:732) INFO: 7epoch:train:4801-4900batch: iter_time=9.723e-05, forward_time=0.106, loss_ctc=76.829, loss_att=59.725, acc=0.640, loss=64.856, backward_time=0.772, grad_norm=83.461, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.510e-04, train_time=2.354
+[gpua014:0/64] 2023-07-03 08:45:27,134 (trainer:732) INFO: 7epoch:train:4901-5000batch: iter_time=9.801e-05, forward_time=0.107, loss_ctc=91.616, loss_att=68.958, acc=0.640, loss=75.755, backward_time=0.761, grad_norm=96.348, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.508e-04, train_time=2.175
+[gpua014:0/64] 2023-07-03 08:45:28,909 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-07-03 08:45:51,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 08:45:55,336 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d72e43460>)
+[gpua014:0/64] 2023-07-03 08:45:55,336 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpua014:0/64] 2023-07-03 08:45:55,343 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 08:51:32,941 (trainer:732) INFO: 7epoch:train:5001-5100batch: iter_time=1.602, forward_time=0.172, loss_ctc=88.036, loss_att=75.518, acc=0.648, loss=79.274, backward_time=0.778, grad_norm=136.654, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.115, optim0_lr0=1.507e-04, train_time=7.316
+[gpua014:0/64] 2023-07-03 08:53:29,322 (trainer:732) INFO: 7epoch:train:5101-5200batch: iter_time=1.157e-04, forward_time=0.107, loss_ctc=76.596, loss_att=62.630, acc=0.665, loss=66.820, backward_time=0.782, grad_norm=85.789, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.505e-04, train_time=2.328
+[gpua014:0/64] 2023-07-03 08:55:20,487 (trainer:732) INFO: 7epoch:train:5201-5300batch: iter_time=1.116e-04, forward_time=0.107, loss_ctc=93.002, loss_att=64.591, acc=0.674, loss=73.115, backward_time=0.767, grad_norm=100.095, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.504e-04, train_time=2.223
+[gpua014:0/64] 2023-07-03 08:57:20,388 (trainer:732) INFO: 7epoch:train:5301-5400batch: iter_time=1.073e-04, forward_time=0.108, loss_ctc=66.339, loss_att=55.811, acc=0.659, loss=58.970, backward_time=0.795, grad_norm=74.518, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.503e-04, train_time=2.398
+[gpua014:0/64] 2023-07-03 08:59:11,806 (trainer:732) INFO: 7epoch:train:5401-5500batch: iter_time=1.043e-04, forward_time=0.109, loss_ctc=70.391, loss_att=58.922, acc=0.662, loss=62.363, backward_time=0.776, grad_norm=68.908, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.501e-04, train_time=2.228
+[gpua014:0/64] 2023-07-03 09:01:07,659 (trainer:732) INFO: 7epoch:train:5501-5600batch: iter_time=1.011e-04, forward_time=0.108, loss_ctc=89.550, loss_att=68.562, acc=0.642, loss=74.858, backward_time=0.780, grad_norm=93.115, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.500e-04, train_time=2.317
+[gpua014:0/64] 2023-07-03 09:03:00,602 (trainer:732) INFO: 7epoch:train:5601-5700batch: iter_time=1.080e-04, forward_time=0.108, loss_ctc=82.291, loss_att=65.123, acc=0.662, loss=70.273, backward_time=0.771, grad_norm=79.031, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.499e-04, train_time=2.259
+[gpua014:0/64] 2023-07-03 09:05:32,666 (trainer:732) INFO: 7epoch:train:5701-5800batch: iter_time=1.152e-04, forward_time=0.107, loss_ctc=73.361, loss_att=55.695, acc=0.672, loss=60.995, backward_time=0.856, grad_norm=71.120, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.497e-04, train_time=3.041
+[gpua014:0/64] 2023-07-03 09:07:52,843 (trainer:732) INFO: 7epoch:train:5801-5900batch: iter_time=1.130e-04, forward_time=0.107, loss_ctc=76.767, loss_att=58.968, acc=0.652, loss=64.308, backward_time=0.811, grad_norm=80.975, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.496e-04, train_time=2.803
+[gpua014:0/64] 2023-07-03 09:09:55,021 (trainer:732) INFO: 7epoch:train:5901-6000batch: iter_time=1.111e-04, forward_time=0.107, loss_ctc=90.310, loss_att=66.589, acc=0.657, loss=73.705, backward_time=0.785, grad_norm=83.293, clip=100.000, loss_scale=5.369e+08, optim_step_time=0.113, optim0_lr0=1.495e-04, train_time=2.443
+[gpua014:0/64] 2023-07-03 09:10:15,050 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-07-03 09:10:37,177 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 09:10:41,353 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d55587d90>)
+[gpua014:0/64] 2023-07-03 09:10:41,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpua014:0/64] 2023-07-03 09:10:41,460 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 09:17:33,350 (trainer:732) INFO: 7epoch:train:6001-6100batch: iter_time=2.515, forward_time=0.148, loss_ctc=86.303, loss_att=76.234, acc=0.636, loss=79.255, backward_time=0.767, grad_norm=94.935, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.115, optim0_lr0=1.493e-04, train_time=9.165
+[gpua014:0/64] 2023-07-03 09:19:20,370 (trainer:732) INFO: 7epoch:train:6101-6200batch: iter_time=9.976e-05, forward_time=0.107, loss_ctc=75.093, loss_att=61.126, acc=0.658, loss=65.316, backward_time=0.770, grad_norm=76.421, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.492e-04, train_time=2.142
+[gpua014:0/64] 2023-07-03 09:21:06,445 (trainer:732) INFO: 7epoch:train:6201-6300batch: iter_time=1.019e-04, forward_time=0.107, loss_ctc=92.263, loss_att=64.445, acc=0.670, loss=72.790, backward_time=0.765, grad_norm=114.005, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.491e-04, train_time=2.121
+[gpua014:0/64] 2023-07-03 09:22:46,507 (trainer:732) INFO: 7epoch:train:6301-6400batch: iter_time=1.028e-04, forward_time=0.107, loss_ctc=68.110, loss_att=55.528, acc=0.654, loss=59.302, backward_time=0.750, grad_norm=71.688, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.489e-04, train_time=2.001
+[gpua014:0/64] 2023-07-03 09:24:26,341 (trainer:732) INFO: 7epoch:train:6401-6500batch: iter_time=1.034e-04, forward_time=0.107, loss_ctc=69.962, loss_att=59.255, acc=0.651, loss=62.467, backward_time=0.752, grad_norm=79.622, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.488e-04, train_time=1.996
+[gpua014:0/64] 2023-07-03 09:26:11,745 (trainer:732) INFO: 7epoch:train:6501-6600batch: iter_time=1.045e-04, forward_time=0.107, loss_ctc=89.595, loss_att=68.689, acc=0.632, loss=74.960, backward_time=0.762, grad_norm=92.404, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.487e-04, train_time=2.108
+[gpua014:0/64] 2023-07-03 09:27:57,943 (trainer:732) INFO: 7epoch:train:6601-6700batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=79.995, loss_att=63.315, acc=0.659, loss=68.319, backward_time=0.758, grad_norm=81.903, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.485e-04, train_time=2.124
+[gpua014:0/64] 2023-07-03 09:30:02,407 (trainer:732) INFO: 7epoch:train:6701-6800batch: iter_time=1.086e-04, forward_time=0.107, loss_ctc=72.661, loss_att=56.214, acc=0.665, loss=61.148, backward_time=0.790, grad_norm=73.510, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.484e-04, train_time=2.489
+[gpua014:0/64] 2023-07-03 09:31:55,436 (trainer:732) INFO: 7epoch:train:6801-6900batch: iter_time=1.070e-04, forward_time=0.106, loss_ctc=75.846, loss_att=58.150, acc=0.646, loss=63.459, backward_time=0.790, grad_norm=83.791, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.483e-04, train_time=2.260
+[gpua014:0/64] 2023-07-03 09:33:39,246 (trainer:732) INFO: 7epoch:train:6901-7000batch: iter_time=1.033e-04, forward_time=0.106, loss_ctc=93.412, loss_att=69.559, acc=0.643, loss=76.715, backward_time=0.755, grad_norm=124.091, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.481e-04, train_time=2.076
+[gpua014:0/64] 2023-07-03 09:33:41,064 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-07-03 09:34:03,574 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 09:34:07,898 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d5559b4f0>)
+[gpua014:0/64] 2023-07-03 09:34:07,898 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpua014:0/64] 2023-07-03 09:34:07,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 09:39:53,899 (trainer:732) INFO: 7epoch:train:7001-7100batch: iter_time=1.661, forward_time=0.165, loss_ctc=85.745, loss_att=74.852, acc=0.638, loss=78.120, backward_time=0.796, grad_norm=89.367, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.116, optim0_lr0=1.480e-04, train_time=7.493
+[gpua014:0/64] 2023-07-03 09:41:34,577 (trainer:732) INFO: 7epoch:train:7101-7200batch: iter_time=9.505e-05, forward_time=0.106, loss_ctc=75.545, loss_att=61.370, acc=0.658, loss=65.622, backward_time=0.754, grad_norm=83.909, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.479e-04, train_time=2.014
+[gpua014:0/64] 2023-07-03 09:43:21,052 (trainer:732) INFO: 7epoch:train:7201-7300batch: iter_time=9.087e-05, forward_time=0.107, loss_ctc=91.013, loss_att=63.631, acc=0.674, loss=71.845, backward_time=0.757, grad_norm=92.755, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.478e-04, train_time=2.129
+[gpua014:0/64] 2023-07-03 09:45:21,916 (trainer:732) INFO: 7epoch:train:7301-7400batch: iter_time=9.542e-05, forward_time=0.106, loss_ctc=66.154, loss_att=54.867, acc=0.655, loss=58.253, backward_time=0.800, grad_norm=71.200, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.476e-04, train_time=2.417
+[gpua014:0/64] 2023-07-03 09:47:10,246 (trainer:732) INFO: 7epoch:train:7401-7500batch: iter_time=9.928e-05, forward_time=0.106, loss_ctc=70.077, loss_att=58.476, acc=0.654, loss=61.956, backward_time=0.760, grad_norm=95.786, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.475e-04, train_time=2.166
+[gpua014:0/64] 2023-07-03 09:48:59,141 (trainer:732) INFO: 7epoch:train:7501-7600batch: iter_time=9.230e-05, forward_time=0.106, loss_ctc=87.055, loss_att=68.038, acc=0.636, loss=73.743, backward_time=0.777, grad_norm=89.669, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.474e-04, train_time=2.178
+[gpua014:0/64] 2023-07-03 09:51:08,737 (trainer:732) INFO: 7epoch:train:7601-7700batch: iter_time=9.734e-05, forward_time=0.107, loss_ctc=80.239, loss_att=62.993, acc=0.660, loss=68.167, backward_time=0.804, grad_norm=88.549, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.472e-04, train_time=2.592
+[gpua014:0/64] 2023-07-03 09:53:15,794 (trainer:732) INFO: 7epoch:train:7701-7800batch: iter_time=9.459e-05, forward_time=0.106, loss_ctc=71.141, loss_att=55.078, acc=0.670, loss=59.897, backward_time=0.808, grad_norm=70.481, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.471e-04, train_time=2.541
+[gpua014:0/64] 2023-07-03 09:55:31,351 (trainer:732) INFO: 7epoch:train:7801-7900batch: iter_time=9.697e-05, forward_time=0.106, loss_ctc=75.406, loss_att=58.833, acc=0.648, loss=63.805, backward_time=0.798, grad_norm=87.772, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.470e-04, train_time=2.711
+[gpua014:0/64] 2023-07-03 09:57:28,632 (trainer:732) INFO: 7epoch:train:7901-8000batch: iter_time=8.980e-05, forward_time=0.106, loss_ctc=91.543, loss_att=68.383, acc=0.647, loss=75.331, backward_time=0.779, grad_norm=86.404, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.112, optim0_lr0=1.469e-04, train_time=2.345
+[gpua014:0/64] 2023-07-03 09:57:47,691 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-07-03 09:58:09,847 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 09:58:14,125 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4c93565540>)
+[gpua014:0/64] 2023-07-03 09:58:14,125 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpua014:0/64] 2023-07-03 09:58:14,262 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 10:03:58,466 (trainer:732) INFO: 7epoch:train:8001-8100batch: iter_time=2.282, forward_time=0.154, loss_ctc=83.365, loss_att=75.432, acc=0.644, loss=77.812, backward_time=0.766, grad_norm=88.530, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.116, optim0_lr0=1.467e-04, train_time=7.796
+[gpua014:0/64] 2023-07-03 10:05:43,265 (trainer:732) INFO: 7epoch:train:8101-8200batch: iter_time=8.830e-05, forward_time=0.107, loss_ctc=74.584, loss_att=62.289, acc=0.666, loss=65.978, backward_time=0.763, grad_norm=79.943, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.466e-04, train_time=2.096
+[gpua014:0/64] 2023-07-03 10:07:29,895 (trainer:732) INFO: 7epoch:train:8201-8300batch: iter_time=8.939e-05, forward_time=0.108, loss_ctc=89.677, loss_att=62.797, acc=0.680, loss=70.861, backward_time=0.761, grad_norm=94.370, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.465e-04, train_time=2.132
+[gpua014:0/64] 2023-07-03 10:09:09,615 (trainer:732) INFO: 7epoch:train:8301-8400batch: iter_time=8.848e-05, forward_time=0.107, loss_ctc=65.961, loss_att=54.393, acc=0.663, loss=57.863, backward_time=0.750, grad_norm=73.361, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.464e-04, train_time=1.994
+[gpua014:0/64] 2023-07-03 10:10:55,553 (trainer:732) INFO: 7epoch:train:8401-8500batch: iter_time=9.190e-05, forward_time=0.108, loss_ctc=71.379, loss_att=58.830, acc=0.665, loss=62.595, backward_time=0.760, grad_norm=93.320, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.462e-04, train_time=2.119
+[gpua014:0/64] 2023-07-03 10:12:42,291 (trainer:732) INFO: 7epoch:train:8501-8600batch: iter_time=8.775e-05, forward_time=0.107, loss_ctc=86.728, loss_att=68.520, acc=0.644, loss=73.982, backward_time=0.764, grad_norm=91.445, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.461e-04, train_time=2.135
+[gpua014:0/64] 2023-07-03 10:14:31,632 (trainer:732) INFO: 7epoch:train:8601-8700batch: iter_time=8.929e-05, forward_time=0.108, loss_ctc=80.640, loss_att=63.366, acc=0.669, loss=68.548, backward_time=0.762, grad_norm=88.426, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.460e-04, train_time=2.187
+[gpua014:0/64] 2023-07-03 10:16:59,480 (trainer:732) INFO: 7epoch:train:8701-8800batch: iter_time=8.639e-05, forward_time=0.107, loss_ctc=71.500, loss_att=54.790, acc=0.676, loss=59.803, backward_time=0.872, grad_norm=79.042, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.459e-04, train_time=2.957
+[gpua014:0/64] 2023-07-03 10:18:46,729 (trainer:732) INFO: 7epoch:train:8801-8900batch: iter_time=8.369e-05, forward_time=0.107, loss_ctc=76.836, loss_att=60.705, acc=0.649, loss=65.544, backward_time=0.773, grad_norm=84.442, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.457e-04, train_time=2.145
+[gpua014:0/64] 2023-07-03 10:20:29,757 (trainer:732) INFO: 7epoch:train:8901-9000batch: iter_time=8.318e-05, forward_time=0.107, loss_ctc=91.639, loss_att=67.676, acc=0.659, loss=74.865, backward_time=0.753, grad_norm=85.993, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.456e-04, train_time=2.060
+[gpua014:0/64] 2023-07-03 10:20:32,022 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-07-03 10:20:54,558 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 10:20:58,838 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d68f3a800>)
+[gpua014:0/64] 2023-07-03 10:20:58,839 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpua014:0/64] 2023-07-03 10:20:58,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 10:27:29,156 (trainer:732) INFO: 7epoch:train:9001-9100batch: iter_time=1.703, forward_time=0.178, loss_ctc=85.505, loss_att=72.798, acc=0.653, loss=76.610, backward_time=0.772, grad_norm=104.227, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.118, optim0_lr0=1.455e-04, train_time=8.387
+[gpua014:0/64] 2023-07-03 10:29:09,609 (trainer:732) INFO: 7epoch:train:9101-9200batch: iter_time=1.373e-04, forward_time=0.109, loss_ctc=74.188, loss_att=61.890, acc=0.667, loss=65.580, backward_time=0.754, grad_norm=96.431, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.454e-04, train_time=2.009
+[gpua014:0/64] 2023-07-03 10:30:54,185 (trainer:732) INFO: 7epoch:train:9201-9300batch: iter_time=1.398e-04, forward_time=0.108, loss_ctc=90.046, loss_att=61.760, acc=0.686, loss=70.246, backward_time=0.757, grad_norm=100.195, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.452e-04, train_time=2.091
+[gpua014:0/64] 2023-07-03 10:32:34,258 (trainer:732) INFO: 7epoch:train:9301-9400batch: iter_time=1.367e-04, forward_time=0.109, loss_ctc=64.672, loss_att=52.652, acc=0.671, loss=56.258, backward_time=0.752, grad_norm=84.092, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.451e-04, train_time=2.001
+[gpua014:0/64] 2023-07-03 10:34:14,068 (trainer:732) INFO: 7epoch:train:9401-9500batch: iter_time=1.388e-04, forward_time=0.109, loss_ctc=68.505, loss_att=59.046, acc=0.667, loss=61.884, backward_time=0.750, grad_norm=76.212, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.450e-04, train_time=1.996
+[gpua014:0/64] 2023-07-03 10:36:15,496 (trainer:732) INFO: 7epoch:train:9501-9600batch: iter_time=1.410e-04, forward_time=0.108, loss_ctc=86.461, loss_att=67.255, acc=0.648, loss=73.017, backward_time=0.800, grad_norm=88.972, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.449e-04, train_time=2.428
+[gpua014:0/64] 2023-07-03 10:38:26,787 (trainer:732) INFO: 7epoch:train:9601-9700batch: iter_time=1.310e-04, forward_time=0.110, loss_ctc=80.413, loss_att=63.005, acc=0.669, loss=68.227, backward_time=0.791, grad_norm=82.114, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.448e-04, train_time=2.626
+[gpua014:0/64] 2023-07-03 10:40:30,272 (trainer:732) INFO: 7epoch:train:9701-9800batch: iter_time=1.379e-04, forward_time=0.186, loss_ctc=74.679, loss_att=55.975, acc=0.675, loss=61.586, backward_time=0.792, grad_norm=75.191, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.117, optim0_lr0=1.446e-04, train_time=2.469
+[gpua014:0/64] 2023-07-03 10:42:43,755 (trainer:732) INFO: 7epoch:train:9801-9900batch: iter_time=1.366e-04, forward_time=0.124, loss_ctc=74.408, loss_att=58.232, acc=0.656, loss=63.085, backward_time=0.827, grad_norm=75.295, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.114, optim0_lr0=1.445e-04, train_time=2.669
+[gpua014:0/64] 2023-07-03 10:44:38,433 (trainer:732) INFO: 7epoch:train:9901-10000batch: iter_time=1.286e-04, forward_time=0.108, loss_ctc=90.445, loss_att=66.863, acc=0.659, loss=73.938, backward_time=0.778, grad_norm=87.658, clip=100.000, loss_scale=1.074e+09, optim_step_time=0.113, optim0_lr0=1.444e-04, train_time=2.293
+[gpua014:0/64] 2023-07-03 10:57:01,879 (trainer:338) INFO: 7epoch results: [train] iter_time=0.255, forward_time=0.115, loss_ctc=81.029, loss_att=63.896, acc=0.652, loss=69.036, backward_time=0.778, grad_norm=89.792, clip=100.000, loss_scale=6.979e+08, optim_step_time=0.113, optim0_lr0=1.509e-04, train_time=3.021, time=4 hours, 12 minutes and 8.29 seconds, total_count=40000, gpu_max_cached_mem_GB=37.479, [valid] loss_ctc=61.220, cer_ctc=0.331, loss_att=50.294, acc=0.595, cer=0.461, wer=0.999, loss=53.572, time=6 minutes and 6.24 seconds, total_count=4554, gpu_max_cached_mem_GB=37.479, [att_plot] time=5 minutes and 53.25 seconds, total_count=0, gpu_max_cached_mem_GB=37.479
+[gpua014:0/64] 2023-07-03 10:57:17,180 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-07-03 10:57:17,199 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/2epoch.pth
+[gpua014:0/64] 2023-07-03 10:57:17,199 (trainer:272) INFO: 8/100epoch started. Estimated time to finish: 2 weeks, 2 days and 12 hours
+[gpua014:0/64] 2023-07-03 10:57:17,202 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-07-03 10:57:39,112 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 10:57:43,426 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d6829ef50>)
+[gpua014:0/64] 2023-07-03 10:57:43,426 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpua014:0/64] 2023-07-03 10:57:43,434 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 11:05:34,810 (trainer:732) INFO: 8epoch:train:1-100batch: iter_time=3.863, forward_time=0.182, loss_ctc=71.953, loss_att=56.579, acc=0.652, loss=61.191, backward_time=0.767, grad_norm=81.674, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.116, optim0_lr0=1.443e-04, train_time=9.952
+[gpua014:0/64] 2023-07-03 11:07:15,061 (trainer:732) INFO: 8epoch:train:101-200batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=83.688, loss_att=62.700, acc=0.660, loss=68.996, backward_time=0.751, grad_norm=83.521, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.442e-04, train_time=2.005
+[gpua014:0/64] 2023-07-03 11:08:58,814 (trainer:732) INFO: 8epoch:train:201-300batch: iter_time=1.029e-04, forward_time=0.107, loss_ctc=77.702, loss_att=60.845, acc=0.642, loss=65.902, backward_time=0.769, grad_norm=72.915, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.440e-04, train_time=2.075
+[gpua014:0/64] 2023-07-03 11:10:55,797 (trainer:732) INFO: 8epoch:train:301-400batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=86.552, loss_att=68.588, acc=0.662, loss=73.977, backward_time=0.778, grad_norm=84.683, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.439e-04, train_time=2.339
+[gpua014:0/64] 2023-07-03 11:12:42,225 (trainer:732) INFO: 8epoch:train:401-500batch: iter_time=1.129e-04, forward_time=0.107, loss_ctc=89.365, loss_att=76.265, acc=0.638, loss=80.195, backward_time=0.779, grad_norm=103.391, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.438e-04, train_time=2.128
+[gpua014:0/64] 2023-07-03 11:14:54,649 (trainer:732) INFO: 8epoch:train:501-600batch: iter_time=1.316e-04, forward_time=0.107, loss_ctc=92.306, loss_att=69.410, acc=0.647, loss=76.279, backward_time=0.812, grad_norm=85.465, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.437e-04, train_time=2.648
+[gpua014:0/64] 2023-07-03 11:16:44,481 (trainer:732) INFO: 8epoch:train:601-700batch: iter_time=1.425e-04, forward_time=0.107, loss_ctc=81.930, loss_att=62.838, acc=0.653, loss=68.566, backward_time=0.777, grad_norm=113.254, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.436e-04, train_time=2.196
+[gpua014:0/64] 2023-07-03 11:18:37,508 (trainer:732) INFO: 8epoch:train:701-800batch: iter_time=1.136e-04, forward_time=0.107, loss_ctc=88.066, loss_att=64.322, acc=0.646, loss=71.445, backward_time=0.779, grad_norm=92.199, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.434e-04, train_time=2.260
+[gpua014:0/64] 2023-07-03 11:20:24,726 (trainer:732) INFO: 8epoch:train:801-900batch: iter_time=8.943e-05, forward_time=0.107, loss_ctc=93.568, loss_att=68.082, acc=0.659, loss=75.728, backward_time=0.769, grad_norm=140.705, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.433e-04, train_time=2.144
+[gpua014:0/64] 2023-07-03 11:22:22,840 (trainer:732) INFO: 8epoch:train:901-1000batch: iter_time=8.851e-05, forward_time=0.105, loss_ctc=70.937, loss_att=57.486, acc=0.648, loss=61.522, backward_time=0.786, grad_norm=72.406, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.432e-04, train_time=2.362
+[gpua014:0/64] 2023-07-03 11:22:34,179 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-07-03 11:22:55,942 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 11:23:00,214 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f41d2d2bfa0>)
+[gpua014:0/64] 2023-07-03 11:23:00,214 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpua014:0/64] 2023-07-03 11:23:00,230 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 11:30:44,366 (trainer:732) INFO: 8epoch:train:1001-1100batch: iter_time=1.738, forward_time=0.177, loss_ctc=71.293, loss_att=55.648, acc=0.657, loss=60.341, backward_time=0.841, grad_norm=81.246, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.116, optim0_lr0=1.431e-04, train_time=10.029
+[gpua014:0/64] 2023-07-03 11:32:40,908 (trainer:732) INFO: 8epoch:train:1101-1200batch: iter_time=9.558e-05, forward_time=0.106, loss_ctc=83.943, loss_att=63.374, acc=0.659, loss=69.545, backward_time=0.790, grad_norm=79.844, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.430e-04, train_time=2.332
+[gpua014:0/64] 2023-07-03 11:34:35,694 (trainer:732) INFO: 8epoch:train:1201-1300batch: iter_time=8.459e-05, forward_time=0.107, loss_ctc=76.391, loss_att=59.918, acc=0.645, loss=64.860, backward_time=0.781, grad_norm=73.294, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.429e-04, train_time=2.296
+[gpua014:0/64] 2023-07-03 11:36:35,790 (trainer:732) INFO: 8epoch:train:1301-1400batch: iter_time=8.933e-05, forward_time=0.106, loss_ctc=85.561, loss_att=66.344, acc=0.667, loss=72.109, backward_time=0.802, grad_norm=87.421, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.427e-04, train_time=2.402
+[gpua014:0/64] 2023-07-03 11:38:49,290 (trainer:732) INFO: 8epoch:train:1401-1500batch: iter_time=9.077e-05, forward_time=0.107, loss_ctc=86.480, loss_att=76.600, acc=0.641, loss=79.564, backward_time=0.825, grad_norm=92.828, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.112, optim0_lr0=1.426e-04, train_time=2.670
+[gpua014:0/64] 2023-07-03 11:40:48,063 (trainer:732) INFO: 8epoch:train:1501-1600batch: iter_time=9.566e-05, forward_time=0.107, loss_ctc=89.979, loss_att=67.303, acc=0.653, loss=74.105, backward_time=0.798, grad_norm=91.494, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.425e-04, train_time=2.375
+[gpua014:0/64] 2023-07-03 11:42:50,035 (trainer:732) INFO: 8epoch:train:1601-1700batch: iter_time=9.295e-05, forward_time=0.107, loss_ctc=80.658, loss_att=61.124, acc=0.663, loss=66.984, backward_time=0.812, grad_norm=85.053, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.424e-04, train_time=2.439
+[gpua014:0/64] 2023-07-03 11:44:46,238 (trainer:732) INFO: 8epoch:train:1701-1800batch: iter_time=1.007e-04, forward_time=0.106, loss_ctc=86.900, loss_att=63.733, acc=0.650, loss=70.683, backward_time=0.787, grad_norm=98.100, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.423e-04, train_time=2.324
+[gpua014:0/64] 2023-07-03 11:47:06,824 (trainer:732) INFO: 8epoch:train:1801-1900batch: iter_time=1.073e-04, forward_time=0.107, loss_ctc=92.292, loss_att=68.604, acc=0.657, loss=75.710, backward_time=0.836, grad_norm=99.245, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.422e-04, train_time=2.811
+[gpua014:0/64] 2023-07-03 11:49:44,334 (trainer:732) INFO: 8epoch:train:1901-2000batch: iter_time=1.030e-04, forward_time=0.107, loss_ctc=71.578, loss_att=57.196, acc=0.649, loss=61.510, backward_time=0.863, grad_norm=79.403, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.420e-04, train_time=3.150
+[gpua014:0/64] 2023-07-03 11:50:04,363 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua014:0/64] 2023-07-03 11:50:26,890 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 11:50:31,243 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f40cb836590>)
+[gpua014:0/64] 2023-07-03 11:50:31,244 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpua014:0/64] 2023-07-03 11:50:31,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 11:59:59,939 (trainer:732) INFO: 8epoch:train:2001-2100batch: iter_time=2.825, forward_time=0.200, loss_ctc=70.365, loss_att=54.572, acc=0.663, loss=59.310, backward_time=1.038, grad_norm=86.690, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.118, optim0_lr0=1.419e-04, train_time=12.312
+[gpua014:0/64] 2023-07-03 12:02:53,285 (trainer:732) INFO: 8epoch:train:2101-2200batch: iter_time=1.095e-04, forward_time=0.108, loss_ctc=83.391, loss_att=61.753, acc=0.666, loss=68.245, backward_time=0.883, grad_norm=77.824, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.418e-04, train_time=3.467
+[gpua014:0/64] 2023-07-03 12:05:41,833 (trainer:732) INFO: 8epoch:train:2201-2300batch: iter_time=1.132e-04, forward_time=0.108, loss_ctc=77.306, loss_att=60.610, acc=0.645, loss=65.619, backward_time=0.933, grad_norm=81.733, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.417e-04, train_time=3.371
+[gpua014:0/64] 2023-07-03 12:08:26,398 (trainer:732) INFO: 8epoch:train:2301-2400batch: iter_time=1.175e-04, forward_time=0.109, loss_ctc=83.620, loss_att=65.137, acc=0.672, loss=70.682, backward_time=0.857, grad_norm=94.295, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.416e-04, train_time=3.291
+[gpua014:0/64] 2023-07-03 12:10:59,777 (trainer:732) INFO: 8epoch:train:2401-2500batch: iter_time=1.165e-04, forward_time=0.109, loss_ctc=85.837, loss_att=73.558, acc=0.649, loss=77.242, backward_time=0.813, grad_norm=88.674, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.415e-04, train_time=3.067
+[gpua014:0/64] 2023-07-03 12:14:24,808 (trainer:732) INFO: 8epoch:train:2501-2600batch: iter_time=1.174e-04, forward_time=0.108, loss_ctc=91.106, loss_att=68.368, acc=0.650, loss=75.189, backward_time=0.981, grad_norm=90.440, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.414e-04, train_time=4.100
+[gpua014:0/64] 2023-07-03 12:17:14,627 (trainer:732) INFO: 8epoch:train:2601-2700batch: iter_time=1.268e-04, forward_time=0.108, loss_ctc=79.435, loss_att=61.102, acc=0.664, loss=66.602, backward_time=0.862, grad_norm=90.710, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.412e-04, train_time=3.396
+[gpua014:0/64] 2023-07-03 12:19:50,904 (trainer:732) INFO: 8epoch:train:2701-2800batch: iter_time=1.226e-04, forward_time=0.108, loss_ctc=85.706, loss_att=61.989, acc=0.655, loss=69.104, backward_time=0.803, grad_norm=90.672, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.411e-04, train_time=3.125
+[gpua014:0/64] 2023-07-03 12:22:45,720 (trainer:732) INFO: 8epoch:train:2801-2900batch: iter_time=1.332e-04, forward_time=0.108, loss_ctc=89.745, loss_att=67.272, acc=0.664, loss=74.014, backward_time=1.049, grad_norm=96.450, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.410e-04, train_time=3.496
+[gpua014:0/64] 2023-07-03 12:25:04,044 (trainer:732) INFO: 8epoch:train:2901-3000batch: iter_time=1.130e-04, forward_time=0.107, loss_ctc=72.146, loss_att=56.681, acc=0.656, loss=61.320, backward_time=0.809, grad_norm=81.054, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.409e-04, train_time=2.766
+[gpua014:0/64] 2023-07-03 12:25:21,806 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua014:0/64] 2023-07-03 12:25:43,932 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 12:25:48,257 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f41d249b460>)
+[gpua014:0/64] 2023-07-03 12:25:48,257 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpua014:0/64] 2023-07-03 12:25:48,265 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 12:32:34,384 (trainer:732) INFO: 8epoch:train:3001-3100batch: iter_time=2.673, forward_time=0.160, loss_ctc=70.145, loss_att=55.103, acc=0.667, loss=59.616, backward_time=0.826, grad_norm=76.517, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.115, optim0_lr0=1.408e-04, train_time=9.006
+[gpua014:0/64] 2023-07-03 12:34:29,875 (trainer:732) INFO: 8epoch:train:3101-3200batch: iter_time=1.030e-04, forward_time=0.107, loss_ctc=83.797, loss_att=61.450, acc=0.667, loss=68.154, backward_time=0.777, grad_norm=90.037, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.407e-04, train_time=2.310
+[gpua014:0/64] 2023-07-03 12:36:26,930 (trainer:732) INFO: 8epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.107, loss_ctc=76.116, loss_att=59.328, acc=0.654, loss=64.365, backward_time=0.772, grad_norm=66.043, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.406e-04, train_time=2.341
+[gpua014:0/64] 2023-07-03 12:38:25,290 (trainer:732) INFO: 8epoch:train:3301-3400batch: iter_time=1.018e-04, forward_time=0.107, loss_ctc=84.464, loss_att=66.292, acc=0.673, loss=71.743, backward_time=0.773, grad_norm=78.714, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.405e-04, train_time=2.367
+[gpua014:0/64] 2023-07-03 12:40:38,648 (trainer:732) INFO: 8epoch:train:3401-3500batch: iter_time=9.895e-05, forward_time=0.107, loss_ctc=84.560, loss_att=73.887, acc=0.649, loss=77.089, backward_time=0.834, grad_norm=113.670, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.404e-04, train_time=2.667
+[gpua014:0/64] 2023-07-03 12:43:02,556 (trainer:732) INFO: 8epoch:train:3501-3600batch: iter_time=1.014e-04, forward_time=0.107, loss_ctc=89.255, loss_att=67.307, acc=0.654, loss=73.892, backward_time=0.852, grad_norm=93.040, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.402e-04, train_time=2.878
+[gpua014:0/64] 2023-07-03 12:45:13,037 (trainer:732) INFO: 8epoch:train:3601-3700batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=78.034, loss_att=59.867, acc=0.668, loss=65.317, backward_time=0.803, grad_norm=85.093, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.401e-04, train_time=2.609
+[gpua014:0/64] 2023-07-03 12:47:27,416 (trainer:732) INFO: 8epoch:train:3701-3800batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=86.547, loss_att=62.217, acc=0.654, loss=69.516, backward_time=0.795, grad_norm=88.246, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.400e-04, train_time=2.687
+[gpua014:0/64] 2023-07-03 12:50:02,668 (trainer:732) INFO: 8epoch:train:3801-3900batch: iter_time=1.054e-04, forward_time=0.107, loss_ctc=88.651, loss_att=65.454, acc=0.666, loss=72.413, backward_time=0.862, grad_norm=97.799, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.399e-04, train_time=3.105
+[gpua014:0/64] 2023-07-03 12:52:27,817 (trainer:732) INFO: 8epoch:train:3901-4000batch: iter_time=9.448e-05, forward_time=0.106, loss_ctc=71.610, loss_att=57.257, acc=0.655, loss=61.563, backward_time=0.826, grad_norm=77.210, clip=100.000, loss_scale=2.147e+09, optim_step_time=0.113, optim0_lr0=1.398e-04, train_time=2.903
+[gpua014:0/64] 2023-07-03 12:52:47,846 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua014:0/64] 2023-07-03 12:53:10,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 12:53:14,749 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4e1a22b1f0>)
+[gpua014:0/64] 2023-07-03 12:53:14,749 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpua014:0/64] 2023-07-03 12:53:14,756 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 12:59:57,708 (trainer:732) INFO: 8epoch:train:4001-4100batch: iter_time=2.331, forward_time=0.155, loss_ctc=69.145, loss_att=54.149, acc=0.667, loss=58.648, backward_time=0.796, grad_norm=74.360, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.124, optim0_lr0=1.397e-04, train_time=8.997
+[gpua014:0/64] 2023-07-03 13:01:39,587 (trainer:732) INFO: 8epoch:train:4101-4200batch: iter_time=1.109e-04, forward_time=0.108, loss_ctc=81.363, loss_att=60.356, acc=0.676, loss=66.658, backward_time=0.754, grad_norm=76.531, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.396e-04, train_time=2.038
+[gpua014:0/64] 2023-07-03 13:03:28,592 (trainer:732) INFO: 8epoch:train:4201-4300batch: iter_time=9.674e-05, forward_time=0.107, loss_ctc=76.566, loss_att=59.580, acc=0.652, loss=64.676, backward_time=0.768, grad_norm=72.014, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.395e-04, train_time=2.180
+[gpua014:0/64] 2023-07-03 13:05:21,031 (trainer:732) INFO: 8epoch:train:4301-4400batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=84.096, loss_att=64.758, acc=0.676, loss=70.560, backward_time=0.778, grad_norm=87.357, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.394e-04, train_time=2.249
+[gpua014:0/64] 2023-07-03 13:07:31,694 (trainer:732) INFO: 8epoch:train:4401-4500batch: iter_time=1.007e-04, forward_time=0.106, loss_ctc=86.125, loss_att=74.133, acc=0.649, loss=77.730, backward_time=0.816, grad_norm=107.166, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.393e-04, train_time=2.613
+[gpua014:0/64] 2023-07-03 13:09:27,083 (trainer:732) INFO: 8epoch:train:4501-4600batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=89.580, loss_att=66.935, acc=0.653, loss=73.728, backward_time=0.774, grad_norm=107.438, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.392e-04, train_time=2.308
+[gpua014:0/64] 2023-07-03 13:11:25,430 (trainer:732) INFO: 8epoch:train:4601-4700batch: iter_time=9.916e-05, forward_time=0.107, loss_ctc=78.839, loss_att=59.185, acc=0.673, loss=65.081, backward_time=0.782, grad_norm=81.277, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.390e-04, train_time=2.367
+[gpua014:0/64] 2023-07-03 13:13:46,101 (trainer:732) INFO: 8epoch:train:4701-4800batch: iter_time=1.108e-04, forward_time=0.107, loss_ctc=84.629, loss_att=62.561, acc=0.654, loss=69.181, backward_time=0.817, grad_norm=88.020, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.389e-04, train_time=2.813
+[gpua014:0/64] 2023-07-03 13:15:46,588 (trainer:732) INFO: 8epoch:train:4801-4900batch: iter_time=9.762e-05, forward_time=0.106, loss_ctc=90.902, loss_att=66.494, acc=0.666, loss=73.816, backward_time=0.784, grad_norm=109.251, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.388e-04, train_time=2.410
+[gpua014:0/64] 2023-07-03 13:17:48,653 (trainer:732) INFO: 8epoch:train:4901-5000batch: iter_time=9.738e-05, forward_time=0.106, loss_ctc=71.522, loss_att=56.646, acc=0.657, loss=61.109, backward_time=0.805, grad_norm=74.553, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.387e-04, train_time=2.441
+[gpua014:0/64] 2023-07-03 13:17:51,304 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua014:0/64] 2023-07-03 13:18:13,656 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 13:18:17,899 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f41f6b13790>)
+[gpua014:0/64] 2023-07-03 13:18:17,899 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpua014:0/64] 2023-07-03 13:18:17,910 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 13:24:27,929 (trainer:732) INFO: 8epoch:train:5001-5100batch: iter_time=2.555, forward_time=0.149, loss_ctc=68.880, loss_att=54.004, acc=0.665, loss=58.467, backward_time=0.791, grad_norm=75.203, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.117, optim0_lr0=1.386e-04, train_time=7.985
+[gpua014:0/64] 2023-07-03 13:26:26,669 (trainer:732) INFO: 8epoch:train:5101-5200batch: iter_time=9.325e-05, forward_time=0.106, loss_ctc=82.288, loss_att=62.900, acc=0.654, loss=68.717, backward_time=0.788, grad_norm=86.630, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.385e-04, train_time=2.375
+[gpua014:0/64] 2023-07-03 13:28:25,293 (trainer:732) INFO: 8epoch:train:5201-5300batch: iter_time=9.276e-05, forward_time=0.106, loss_ctc=75.183, loss_att=59.997, acc=0.645, loss=64.553, backward_time=0.779, grad_norm=71.010, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.384e-04, train_time=2.372
+[gpua014:0/64] 2023-07-03 13:30:40,953 (trainer:732) INFO: 8epoch:train:5301-5400batch: iter_time=9.294e-05, forward_time=0.106, loss_ctc=85.428, loss_att=67.675, acc=0.665, loss=73.001, backward_time=0.797, grad_norm=84.101, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.383e-04, train_time=2.713
+[gpua014:0/64] 2023-07-03 13:32:52,185 (trainer:732) INFO: 8epoch:train:5401-5500batch: iter_time=9.385e-05, forward_time=0.106, loss_ctc=84.065, loss_att=75.164, acc=0.635, loss=77.834, backward_time=0.812, grad_norm=103.559, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.382e-04, train_time=2.624
+[gpua014:0/64] 2023-07-03 13:34:44,837 (trainer:732) INFO: 8epoch:train:5501-5600batch: iter_time=9.046e-05, forward_time=0.107, loss_ctc=89.001, loss_att=67.994, acc=0.647, loss=74.296, backward_time=0.771, grad_norm=82.224, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.381e-04, train_time=2.253
+[gpua014:0/64] 2023-07-03 13:36:42,385 (trainer:732) INFO: 8epoch:train:5601-5700batch: iter_time=9.856e-05, forward_time=0.106, loss_ctc=77.241, loss_att=59.045, acc=0.664, loss=64.504, backward_time=0.768, grad_norm=81.509, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.380e-04, train_time=2.351
+[gpua014:0/64] 2023-07-03 13:38:54,042 (trainer:732) INFO: 8epoch:train:5701-5800batch: iter_time=9.087e-05, forward_time=0.106, loss_ctc=84.116, loss_att=63.222, acc=0.644, loss=69.490, backward_time=0.817, grad_norm=97.988, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.379e-04, train_time=2.633
+[gpua014:0/64] 2023-07-03 13:40:49,774 (trainer:732) INFO: 8epoch:train:5801-5900batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=88.392, loss_att=65.440, acc=0.670, loss=72.326, backward_time=0.783, grad_norm=98.106, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.378e-04, train_time=2.314
+[gpua014:0/64] 2023-07-03 13:43:19,996 (trainer:732) INFO: 8epoch:train:5901-6000batch: iter_time=4.370e-04, forward_time=0.121, loss_ctc=69.786, loss_att=55.824, acc=0.658, loss=60.012, backward_time=0.846, grad_norm=74.843, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.377e-04, train_time=3.004
+[gpua014:0/64] 2023-07-03 13:43:40,024 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua014:0/64] 2023-07-03 13:44:02,407 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 13:44:06,645 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d9f2735b0>)
+[gpua014:0/64] 2023-07-03 13:44:06,645 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpua014:0/64] 2023-07-03 13:44:06,653 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 13:51:46,776 (trainer:732) INFO: 8epoch:train:6001-6100batch: iter_time=2.333, forward_time=0.142, loss_ctc=70.714, loss_att=54.097, acc=0.664, loss=59.082, backward_time=0.783, grad_norm=101.489, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.115, optim0_lr0=1.376e-04, train_time=10.135
+[gpua014:0/64] 2023-07-03 13:53:27,717 (trainer:732) INFO: 8epoch:train:6101-6200batch: iter_time=1.108e-04, forward_time=0.108, loss_ctc=82.574, loss_att=61.216, acc=0.660, loss=67.623, backward_time=0.754, grad_norm=77.311, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.375e-04, train_time=2.019
+[gpua014:0/64] 2023-07-03 13:55:07,590 (trainer:732) INFO: 8epoch:train:6201-6300batch: iter_time=1.078e-04, forward_time=0.108, loss_ctc=75.131, loss_att=58.601, acc=0.651, loss=63.560, backward_time=0.752, grad_norm=72.031, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.374e-04, train_time=1.997
+[gpua014:0/64] 2023-07-03 13:56:48,280 (trainer:732) INFO: 8epoch:train:6301-6400batch: iter_time=1.166e-04, forward_time=0.108, loss_ctc=82.936, loss_att=66.241, acc=0.668, loss=71.249, backward_time=0.753, grad_norm=81.953, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.373e-04, train_time=2.014
+[gpua014:0/64] 2023-07-03 13:58:33,889 (trainer:732) INFO: 8epoch:train:6401-6500batch: iter_time=1.188e-04, forward_time=0.107, loss_ctc=82.837, loss_att=73.861, acc=0.644, loss=76.554, backward_time=0.767, grad_norm=88.376, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.372e-04, train_time=2.112
+[gpua014:0/64] 2023-07-03 14:00:45,197 (trainer:732) INFO: 8epoch:train:6501-6600batch: iter_time=1.385e-04, forward_time=0.118, loss_ctc=88.465, loss_att=66.303, acc=0.651, loss=72.951, backward_time=0.813, grad_norm=94.601, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.370e-04, train_time=2.626
+[gpua014:0/64] 2023-07-03 14:02:48,614 (trainer:732) INFO: 8epoch:train:6601-6700batch: iter_time=1.137e-04, forward_time=0.107, loss_ctc=78.364, loss_att=59.506, acc=0.661, loss=65.163, backward_time=0.775, grad_norm=84.035, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.369e-04, train_time=2.468
+[gpua014:0/64] 2023-07-03 14:04:39,248 (trainer:732) INFO: 8epoch:train:6701-6800batch: iter_time=1.123e-04, forward_time=0.107, loss_ctc=86.589, loss_att=62.422, acc=0.648, loss=69.672, backward_time=0.774, grad_norm=104.287, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.368e-04, train_time=2.212
+[gpua014:0/64] 2023-07-03 14:06:28,935 (trainer:732) INFO: 8epoch:train:6801-6900batch: iter_time=1.166e-04, forward_time=0.108, loss_ctc=87.795, loss_att=64.863, acc=0.674, loss=71.743, backward_time=0.765, grad_norm=90.585, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.367e-04, train_time=2.194
+[gpua014:0/64] 2023-07-03 14:08:15,899 (trainer:732) INFO: 8epoch:train:6901-7000batch: iter_time=3.409e-04, forward_time=0.115, loss_ctc=69.167, loss_att=55.059, acc=0.659, loss=59.291, backward_time=0.767, grad_norm=74.421, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.366e-04, train_time=2.139
+[gpua014:0/64] 2023-07-03 14:08:34,844 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua014:0/64] 2023-07-03 14:08:57,090 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 14:09:01,412 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4e1a2d3790>)
+[gpua014:0/64] 2023-07-03 14:09:01,412 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpua014:0/64] 2023-07-03 14:09:01,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 14:16:37,830 (trainer:732) INFO: 8epoch:train:7001-7100batch: iter_time=2.183, forward_time=0.156, loss_ctc=69.770, loss_att=53.693, acc=0.665, loss=58.516, backward_time=0.776, grad_norm=78.422, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.114, optim0_lr0=1.365e-04, train_time=10.038
+[gpua014:0/64] 2023-07-03 14:18:22,215 (trainer:732) INFO: 8epoch:train:7101-7200batch: iter_time=8.191e-05, forward_time=0.107, loss_ctc=79.880, loss_att=59.668, acc=0.665, loss=65.731, backward_time=0.763, grad_norm=82.036, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.364e-04, train_time=2.088
+[gpua014:0/64] 2023-07-03 14:20:03,765 (trainer:732) INFO: 8epoch:train:7201-7300batch: iter_time=9.511e-05, forward_time=0.107, loss_ctc=76.077, loss_att=58.461, acc=0.651, loss=63.746, backward_time=0.754, grad_norm=74.942, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.363e-04, train_time=2.031
+[gpua014:0/64] 2023-07-03 14:21:47,371 (trainer:732) INFO: 8epoch:train:7301-7400batch: iter_time=8.692e-05, forward_time=0.107, loss_ctc=83.341, loss_att=65.556, acc=0.666, loss=70.892, backward_time=0.761, grad_norm=90.425, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.362e-04, train_time=2.072
+[gpua014:0/64] 2023-07-03 14:23:40,249 (trainer:732) INFO: 8epoch:train:7401-7500batch: iter_time=1.086e-04, forward_time=0.107, loss_ctc=82.464, loss_att=71.751, acc=0.648, loss=74.965, backward_time=0.760, grad_norm=83.705, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.361e-04, train_time=2.257
+[gpua014:0/64] 2023-07-03 14:25:21,284 (trainer:732) INFO: 8epoch:train:7501-7600batch: iter_time=9.175e-05, forward_time=0.106, loss_ctc=85.298, loss_att=65.408, acc=0.654, loss=71.375, backward_time=0.755, grad_norm=87.441, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.360e-04, train_time=2.020
+[gpua014:0/64] 2023-07-03 14:27:14,374 (trainer:732) INFO: 8epoch:train:7601-7700batch: iter_time=8.964e-05, forward_time=0.107, loss_ctc=78.233, loss_att=59.465, acc=0.664, loss=65.095, backward_time=0.792, grad_norm=94.598, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.359e-04, train_time=2.262
+[gpua014:0/64] 2023-07-03 14:29:08,048 (trainer:732) INFO: 8epoch:train:7701-7800batch: iter_time=8.873e-05, forward_time=0.106, loss_ctc=85.613, loss_att=62.031, acc=0.648, loss=69.106, backward_time=0.772, grad_norm=102.067, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.112, optim0_lr0=1.358e-04, train_time=2.273
+[gpua014:0/64] 2023-07-03 14:31:11,252 (trainer:732) INFO: 8epoch:train:7801-7900batch: iter_time=7.179e-04, forward_time=0.117, loss_ctc=87.400, loss_att=64.545, acc=0.674, loss=71.401, backward_time=0.792, grad_norm=90.161, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.357e-04, train_time=2.464
+[gpua014:0/64] 2023-07-03 14:33:10,144 (trainer:732) INFO: 8epoch:train:7901-8000batch: iter_time=8.795e-05, forward_time=0.106, loss_ctc=71.367, loss_att=55.731, acc=0.661, loss=60.421, backward_time=0.768, grad_norm=76.860, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.356e-04, train_time=2.378
+[gpua014:0/64] 2023-07-03 14:33:25,369 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua014:0/64] 2023-07-03 14:33:47,771 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 14:33:52,129 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d725bf460>)
+[gpua014:0/64] 2023-07-03 14:33:52,129 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpua014:0/64] 2023-07-03 14:33:52,137 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 14:40:38,201 (trainer:732) INFO: 8epoch:train:8001-8100batch: iter_time=2.484, forward_time=0.149, loss_ctc=68.712, loss_att=53.165, acc=0.668, loss=57.829, backward_time=0.764, grad_norm=80.528, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.115, optim0_lr0=1.355e-04, train_time=8.961
+[gpua014:0/64] 2023-07-03 14:42:18,408 (trainer:732) INFO: 8epoch:train:8101-8200batch: iter_time=1.097e-04, forward_time=0.107, loss_ctc=81.164, loss_att=60.410, acc=0.665, loss=66.637, backward_time=0.751, grad_norm=85.474, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.354e-04, train_time=2.004
+[gpua014:0/64] 2023-07-03 14:43:58,521 (trainer:732) INFO: 8epoch:train:8201-8300batch: iter_time=1.187e-04, forward_time=0.107, loss_ctc=74.739, loss_att=57.756, acc=0.652, loss=62.851, backward_time=0.752, grad_norm=82.426, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.353e-04, train_time=2.002
+[gpua014:0/64] 2023-07-03 14:45:38,433 (trainer:732) INFO: 8epoch:train:8301-8400batch: iter_time=1.158e-04, forward_time=0.107, loss_ctc=81.923, loss_att=64.584, acc=0.670, loss=69.786, backward_time=0.751, grad_norm=91.957, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.352e-04, train_time=1.998
+[gpua014:0/64] 2023-07-03 14:47:18,296 (trainer:732) INFO: 8epoch:train:8401-8500batch: iter_time=1.168e-04, forward_time=0.107, loss_ctc=84.705, loss_att=73.219, acc=0.647, loss=76.665, backward_time=0.752, grad_norm=100.698, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.351e-04, train_time=1.997
+[gpua014:0/64] 2023-07-03 14:49:00,052 (trainer:732) INFO: 8epoch:train:8501-8600batch: iter_time=1.157e-04, forward_time=0.107, loss_ctc=88.293, loss_att=65.188, acc=0.655, loss=72.120, backward_time=0.753, grad_norm=96.122, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.350e-04, train_time=2.035
+[gpua014:0/64] 2023-07-03 14:50:43,788 (trainer:732) INFO: 8epoch:train:8601-8700batch: iter_time=1.141e-04, forward_time=0.107, loss_ctc=75.649, loss_att=58.224, acc=0.669, loss=63.452, backward_time=0.752, grad_norm=75.790, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.349e-04, train_time=2.075
+[gpua014:0/64] 2023-07-03 14:52:39,154 (trainer:732) INFO: 8epoch:train:8701-8800batch: iter_time=1.143e-04, forward_time=0.107, loss_ctc=85.753, loss_att=61.870, acc=0.652, loss=69.035, backward_time=0.771, grad_norm=99.146, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.348e-04, train_time=2.307
+[gpua014:0/64] 2023-07-03 14:54:24,402 (trainer:732) INFO: 8epoch:train:8801-8900batch: iter_time=1.138e-04, forward_time=0.107, loss_ctc=85.164, loss_att=62.906, acc=0.678, loss=69.584, backward_time=0.776, grad_norm=90.026, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.347e-04, train_time=2.105
+[gpua014:0/64] 2023-07-03 14:56:04,184 (trainer:732) INFO: 8epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=69.672, loss_att=55.112, acc=0.661, loss=59.480, backward_time=0.751, grad_norm=69.355, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.346e-04, train_time=1.995
+[gpua014:0/64] 2023-07-03 14:56:22,103 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua014:0/64] 2023-07-03 14:56:44,408 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 14:56:48,755 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4684263b20>)
+[gpua014:0/64] 2023-07-03 14:56:48,755 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpua014:0/64] 2023-07-03 14:56:48,762 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 15:03:08,701 (trainer:732) INFO: 8epoch:train:9001-9100batch: iter_time=2.056, forward_time=0.167, loss_ctc=68.726, loss_att=53.032, acc=0.667, loss=57.740, backward_time=0.782, grad_norm=78.336, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.115, optim0_lr0=1.345e-04, train_time=8.490
+[gpua014:0/64] 2023-07-03 15:04:53,568 (trainer:732) INFO: 8epoch:train:9101-9200batch: iter_time=1.171e-04, forward_time=0.108, loss_ctc=80.650, loss_att=59.603, acc=0.669, loss=65.917, backward_time=0.757, grad_norm=89.252, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.344e-04, train_time=2.097
+[gpua014:0/64] 2023-07-03 15:06:37,221 (trainer:732) INFO: 8epoch:train:9201-9300batch: iter_time=1.141e-04, forward_time=0.108, loss_ctc=74.127, loss_att=58.026, acc=0.653, loss=62.857, backward_time=0.755, grad_norm=67.694, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.343e-04, train_time=2.073
+[gpua014:0/64] 2023-07-03 15:08:26,273 (trainer:732) INFO: 8epoch:train:9301-9400batch: iter_time=1.149e-04, forward_time=0.107, loss_ctc=83.723, loss_att=66.524, acc=0.670, loss=71.684, backward_time=0.766, grad_norm=82.671, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.343e-04, train_time=2.181
+[gpua014:0/64] 2023-07-03 15:10:15,946 (trainer:732) INFO: 8epoch:train:9401-9500batch: iter_time=1.135e-04, forward_time=0.107, loss_ctc=82.613, loss_att=72.767, acc=0.646, loss=75.721, backward_time=0.759, grad_norm=92.276, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.342e-04, train_time=2.193
+[gpua014:0/64] 2023-07-03 15:12:19,663 (trainer:732) INFO: 8epoch:train:9501-9600batch: iter_time=1.157e-04, forward_time=0.107, loss_ctc=87.132, loss_att=64.934, acc=0.655, loss=71.593, backward_time=0.785, grad_norm=88.736, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.341e-04, train_time=2.474
+[gpua014:0/64] 2023-07-03 15:14:13,005 (trainer:732) INFO: 8epoch:train:9601-9700batch: iter_time=1.217e-04, forward_time=0.107, loss_ctc=75.814, loss_att=57.986, acc=0.668, loss=63.334, backward_time=0.770, grad_norm=81.673, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.340e-04, train_time=2.267
+[gpua014:0/64] 2023-07-03 15:16:01,610 (trainer:732) INFO: 8epoch:train:9701-9800batch: iter_time=1.059e-04, forward_time=0.108, loss_ctc=83.144, loss_att=61.650, acc=0.651, loss=68.098, backward_time=0.762, grad_norm=89.447, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.339e-04, train_time=2.172
+[gpua014:0/64] 2023-07-03 15:18:01,769 (trainer:732) INFO: 8epoch:train:9801-9900batch: iter_time=1.216e-04, forward_time=0.107, loss_ctc=84.532, loss_att=63.464, acc=0.678, loss=69.784, backward_time=0.780, grad_norm=92.656, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.338e-04, train_time=2.403
+[gpua014:0/64] 2023-07-03 15:19:56,478 (trainer:732) INFO: 8epoch:train:9901-10000batch: iter_time=1.031e-04, forward_time=0.107, loss_ctc=69.278, loss_att=54.766, acc=0.663, loss=59.119, backward_time=0.765, grad_norm=70.410, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.337e-04, train_time=2.294
+[gpua014:0/64] 2023-07-03 15:32:08,521 (trainer:338) INFO: 8epoch results: [train] iter_time=0.251, forward_time=0.113, loss_ctc=80.997, loss_att=62.659, acc=0.658, loss=68.160, backward_time=0.795, grad_norm=87.106, clip=100.000, loss_scale=4.295e+09, optim_step_time=0.113, optim0_lr0=1.388e-04, train_time=3.152, time=4 hours, 22 minutes and 59.51 seconds, total_count=50000, gpu_max_cached_mem_GB=37.479, [valid] loss_ctc=57.660, cer_ctc=0.322, loss_att=48.662, acc=0.601, cer=0.466, wer=0.999, loss=51.361, time=5 minutes and 58.26 seconds, total_count=5566, gpu_max_cached_mem_GB=37.479, [att_plot] time=5 minutes and 53.54 seconds, total_count=0, gpu_max_cached_mem_GB=37.479
+[gpua014:0/64] 2023-07-03 15:32:28,085 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua014:0/64] 2023-07-03 15:32:28,089 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/3epoch.pth
+[gpua014:0/64] 2023-07-03 15:32:28,106 (trainer:272) INFO: 9/100epoch started. Estimated time to finish: 2 weeks, 2 days and 18 hours
+[gpua014:0/64] 2023-07-03 15:32:29,330 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua014:0/64] 2023-07-03 15:32:51,326 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 15:32:57,543 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f41438006d0>)
+[gpua014:0/64] 2023-07-03 15:32:57,543 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpua014:0/64] 2023-07-03 15:32:57,612 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 15:40:23,595 (trainer:732) INFO: 9epoch:train:1-100batch: iter_time=3.646, forward_time=0.167, loss_ctc=89.655, loss_att=65.568, acc=0.669, loss=72.794, backward_time=0.771, grad_norm=91.383, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.116, optim0_lr0=1.336e-04, train_time=9.496
+[gpua014:0/64] 2023-07-03 15:42:03,884 (trainer:732) INFO: 9epoch:train:101-200batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=76.077, loss_att=58.833, acc=0.645, loss=64.006, backward_time=0.754, grad_norm=92.730, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.335e-04, train_time=2.006
+[gpua014:0/64] 2023-07-03 15:43:43,905 (trainer:732) INFO: 9epoch:train:201-300batch: iter_time=1.263e-04, forward_time=0.107, loss_ctc=82.337, loss_att=66.100, acc=0.666, loss=70.971, backward_time=0.752, grad_norm=91.902, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.334e-04, train_time=2.000
+[gpua014:0/64] 2023-07-03 15:45:23,589 (trainer:732) INFO: 9epoch:train:301-400batch: iter_time=1.044e-04, forward_time=0.107, loss_ctc=69.665, loss_att=53.623, acc=0.654, loss=58.435, backward_time=0.751, grad_norm=86.718, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.333e-04, train_time=1.993
+[gpua014:0/64] 2023-07-03 15:47:05,530 (trainer:732) INFO: 9epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.107, loss_ctc=82.780, loss_att=67.200, acc=0.651, loss=71.874, backward_time=0.752, grad_norm=90.931, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.332e-04, train_time=2.039
+[gpua014:0/64] 2023-07-03 15:48:47,893 (trainer:732) INFO: 9epoch:train:501-600batch: iter_time=1.460e-04, forward_time=0.106, loss_ctc=76.113, loss_att=63.506, acc=0.647, loss=67.288, backward_time=0.755, grad_norm=95.358, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.331e-04, train_time=2.047
+[gpua014:0/64] 2023-07-03 15:50:36,432 (trainer:732) INFO: 9epoch:train:601-700batch: iter_time=1.492e-04, forward_time=0.107, loss_ctc=68.422, loss_att=50.873, acc=0.670, loss=56.138, backward_time=0.757, grad_norm=84.612, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.330e-04, train_time=2.171
+[gpua014:0/64] 2023-07-03 15:52:37,725 (trainer:732) INFO: 9epoch:train:701-800batch: iter_time=0.009, forward_time=0.205, loss_ctc=85.934, loss_att=70.542, acc=0.659, loss=75.160, backward_time=0.796, grad_norm=101.509, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.122, optim0_lr0=1.329e-04, train_time=2.425
+[gpua014:0/64] 2023-07-03 15:54:21,489 (trainer:732) INFO: 9epoch:train:801-900batch: iter_time=1.404e-04, forward_time=0.108, loss_ctc=83.452, loss_att=62.453, acc=0.673, loss=68.753, backward_time=0.754, grad_norm=74.119, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.328e-04, train_time=2.075
+[gpua014:0/64] 2023-07-03 15:56:18,047 (trainer:732) INFO: 9epoch:train:901-1000batch: iter_time=1.278e-04, forward_time=0.107, loss_ctc=82.426, loss_att=59.341, acc=0.672, loss=66.266, backward_time=0.778, grad_norm=99.810, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.112, optim0_lr0=1.327e-04, train_time=2.331
+[gpua014:0/64] 2023-07-03 15:56:37,176 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua014:0/64] 2023-07-03 15:56:59,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua014:0/64] 2023-07-03 15:57:03,191 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f4d54fab0d0>)
+[gpua014:0/64] 2023-07-03 15:57:03,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpua014:0/64] 2023-07-03 15:57:03,201 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpua014:0/64] 2023-07-03 16:02:03,355 (trainer:732) INFO: 9epoch:train:1001-1100batch: iter_time=2.192, forward_time=0.178, loss_ctc=89.263, loss_att=65.838, acc=0.657, loss=72.866, backward_time=0.778, grad_norm=91.355, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.116, optim0_lr0=1.326e-04, train_time=6.905
+[gpua014:0/64] 2023-07-03 16:04:10,925 (trainer:732) INFO: 9epoch:train:1101-1200batch: iter_time=9.290e-05, forward_time=0.106, loss_ctc=76.178, loss_att=57.534, acc=0.641, loss=63.127, backward_time=0.780, grad_norm=81.265, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.325e-04, train_time=2.552
+[gpua014:0/64] 2023-07-03 16:06:00,839 (trainer:732) INFO: 9epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.106, loss_ctc=82.000, loss_att=64.829, acc=0.662, loss=69.980, backward_time=0.787, grad_norm=87.498, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.325e-04, train_time=2.198
+[gpua014:0/64] 2023-07-03 16:08:01,489 (trainer:732) INFO: 9epoch:train:1301-1400batch: iter_time=9.972e-05, forward_time=0.105, loss_ctc=68.486, loss_att=51.841, acc=0.652, loss=56.835, backward_time=0.791, grad_norm=84.985, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.324e-04, train_time=2.413
+[gpua014:0/64] 2023-07-03 16:09:53,617 (trainer:732) INFO: 9epoch:train:1401-1500batch: iter_time=1.087e-04, forward_time=0.106, loss_ctc=81.114, loss_att=65.917, acc=0.653, loss=70.476, backward_time=0.768, grad_norm=83.242, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.323e-04, train_time=2.242
+[gpua014:0/64] 2023-07-03 16:11:44,620 (trainer:732) INFO: 9epoch:train:1501-1600batch: iter_time=9.726e-05, forward_time=0.106, loss_ctc=74.640, loss_att=63.516, acc=0.648, loss=66.853, backward_time=0.767, grad_norm=85.450, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.113, optim0_lr0=1.322e-04, train_time=2.220
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py:481: UserWarning: An error happens at loading "dump/raw/org/ru_open_stt_train/data/format.60/data_wav.ark:982143989"
+  warnings.warn('An error happens at loading "{}"'.format(ark_name))
+ERROR:root:Error happened with path=exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4, type=kaldi_ark, id=ru_open_stt_public_youtube700_6ee68828cbe6b0f_000000000_000004400_rus_asr
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 510, in train_one_epoch
+    for iiter, (utt_id, batch) in enumerate(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/reporter.py", line 267, in measure_iter_time
+    retval = next(iterator)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/iterators/multiple_iter_factory.py", line 35, in build_iter
+    yield from iter_factory.build_iter(epoch, shuffle)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
+    data = self._next_data()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
+    return self._process_data(data)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
+    data.reraise()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_utils.py", line 543, in reraise
+    raise exception
+PermissionError: Caught PermissionError in DataLoader worker process 0.
+Original Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
+    data = fetcher.fetch(index)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
+    data = [self.dataset[idx] for idx in possibly_batched_index]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
+    data = [self.dataset[idx] for idx in possibly_batched_index]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 513, in __getitem__
+    value = loader[uid]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 52, in __getitem__
+    retval = self.loader[key]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 479, in __getitem__
+    return self._loader(ark_name)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/matio.py", line 235, in load_mat
+    fd_dict[ark] = open_like_kaldi(ark, "rb")
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 207, in open_like_kaldi
+    return io.open(name, mode, encoding=encoding)
+PermissionError: [Errno 13] Permission denied: 'dump/raw/org/ru_open_stt_train/data/format.60/data_wav.ark'
+
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+srun: error: gpua068: task 11: Exited with exit code 1
+gpua018:3479290:3479374 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua096:2182301:2182391 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua096:2182300:2182393 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua014:1504762:1504842 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua014:1504764:1504840 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua014:1504763:1504839 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua041:2383595:2383739 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua096:2182300:2182300 [2] NCCL INFO comm 0x50397010 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua062:3999118:3999195 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua091:1092313:1092414 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua091:1092314:1092412 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua018:3479289:3479375 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua093:1851602:1851693 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua014:1504763:1504763 [2] NCCL INFO comm 0x8d97bae0 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua041:2383594:2383742 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua041:2383593:2383741 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua041:2383592:2383740 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpua060:2765423:2765503 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua060:2765421:2765505 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+[W ProcessGroupNCCL.cpp:948] [Rank 26] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 27] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 24] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 25] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpua041:2383594:2383594 [2] NCCL INFO comm 0xb8aa2570 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 30] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpua041:2383593:2383593 [1] NCCL INFO comm 0x4f62e590 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 29] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 28] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpua041:2383595:2383595 [3] NCCL INFO comm 0x50d6c2c0 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua020:3382569:3382650 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+[W ProcessGroupNCCL.cpp:948] [Rank 31] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 20] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 23] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 22] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 21] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 16] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 18] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 17] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 19] Found key in store: NCCLABORTEDCOMM:20ab17ac1c17e000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 15. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpua063:1316628:1316714 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua022:3213422:3213505 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua022:3213423:3213502 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua022:3213424:3213503 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua016:2146307:2146402 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua021:3546923:3547008 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua018:3479289:3479289 [2] NCCL INFO comm 0x50f23dd0 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua021:3546922:3547011 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua091:1092313:1092313 [2] NCCL INFO comm 0xb9112ed0 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua018:3479290:3479290 [3] NCCL INFO comm 0xb9d17510 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua014:1504764:1504764 [3] NCCL INFO comm 0x90a5180 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua041:2383592:2383592 [0] NCCL INFO comm 0x500f1b90 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua060:2765421:2765421 [1] NCCL INFO comm 0xf88a8d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua022:3213423:3213423 [2] NCCL INFO comm 0xb5a97440 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua093:1851602:1851602 [2] NCCL INFO comm 0x8eeaee30 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua020:3382569:3382569 [3] NCCL INFO comm 0x50adbf90 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua093:1851603:1851603 [3] NCCL INFO comm 0x51d23280 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua022:3213422:3213422 [1] NCCL INFO comm 0xba8d1d30 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua096:2182299:2182392 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua096:2182301:2182301 [3] NCCL INFO comm 0xb7ff8a70 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua014:1504762:1504762 [1] NCCL INFO comm 0xa6220c0 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua060:2765423:2765423 [3] NCCL INFO comm 0x8bb0f9c0 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua015:2678599:2678682 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua015:2678601:2678679 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua016:2146307:2146307 [3] NCCL INFO comm 0xb69ce0b0 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua088:4022852:4022942 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpua022:3213424:3213424 [3] NCCL INFO comm 0x4eeb3510 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua020:3382568:3382651 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua021:3546921:3546921 [1] NCCL INFO comm 0xb47c80d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua062:3999117:3999117 [2] NCCL INFO comm 0x5126ca20 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua093:1851601:1851695 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua062:3999118:3999118 [3] NCCL INFO comm 0x4f6c8ad0 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua015:2678600:2678600 [2] NCCL INFO comm 0x4fb6eec0 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua063:1316626:1316715 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpua063:1316627:1316713 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpua060:2765422:2765422 [2] NCCL INFO comm 0x50dc0f50 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua021:3546923:3546923 [3] NCCL INFO comm 0x51142940 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua096:2182299:2182299 [1] NCCL INFO comm 0x50cf5510 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua062:3999116:3999116 [1] NCCL INFO comm 0x8302c90 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua091:1092314:1092314 [3] NCCL INFO comm 0x508531a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua021:3546922:3546922 [2] NCCL INFO comm 0xb64560d0 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua093:1851601:1851601 [1] NCCL INFO comm 0x8d2d4770 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua091:1092312:1092312 [1] NCCL INFO comm 0xb15c78d0 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua018:3479288:3479288 [1] NCCL INFO comm 0x5176eca0 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua063:1316626:1316626 [1] NCCL INFO comm 0x50a76db0 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua088:4022851:4022851 [2] NCCL INFO comm 0x8b447690 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua088:4022852:4022852 [3] NCCL INFO comm 0xb0a25610 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua015:2678601:2678601 [3] NCCL INFO comm 0x510fc310 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua063:1316628:1316628 [3] NCCL INFO comm 0x50f53420 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua016:2146306:2146306 [2] NCCL INFO comm 0xb80c42d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua016:2146305:2146305 [1] NCCL INFO comm 0x505b7da0 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua020:3382568:3382568 [2] NCCL INFO comm 0x4f345750 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua015:2678599:2678599 [1] NCCL INFO comm 0xb6725330 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua063:1316627:1316627 [2] NCCL INFO comm 0x50e1a4d0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua020:3382567:3382567 [1] NCCL INFO comm 0x50206940 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua088:4022849:4022849 [0] NCCL INFO comm 0x8e555600 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua015:2678598:2678598 [0] NCCL INFO comm 0x5031b3d0 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua021:3546920:3546920 [0] NCCL INFO comm 0x91f0590 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua022:3213421:3213421 [0] NCCL INFO comm 0x5127a110 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua091:1092311:1092311 [0] NCCL INFO comm 0xb51c2df0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua062:3999115:3999115 [0] NCCL INFO comm 0x4f5279e0 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-2:
+gpua096:2182298:2182298 [0] NCCL INFO comm 0x504ff500 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua020:3382566:3382566 [0] NCCL INFO comm 0x4ff54b70 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua088:4022850:4022850 [1] NCCL INFO comm 0xa543f510 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua014:1504761:1504761 [0] NCCL INFO comm 0x4fbe12c0 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua018:3479287:3479287 [0] NCCL INFO comm 0xa20beed0 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpua016:2146304:2146304 [0] NCCL INFO comm 0x94649e0 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 53] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800146 milliseconds before timing out.
+gpua093:1851600:1851600 [0] NCCL INFO comm 0x9c356d50 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 15] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800066 milliseconds before timing out.
+gpua063:1316625:1316625 [0] NCCL INFO comm 0x50020800 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 55] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800096 milliseconds before timing out.
+Process SpawnProcess-4:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 59] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800088 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 3] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800061 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800091 milliseconds before timing out.
+gpua060:2765420:2765420 [0] NCCL INFO comm 0x8ee9a7d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 30] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800089 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 63] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800063 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 61] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800143 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 28] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800093 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+Process SpawnProcess-3:
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 31] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800090 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 14] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800090 milliseconds before timing out.
+Process SpawnProcess-3:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 62] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800062 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800053 milliseconds before timing out.
+Process SpawnProcess-3:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800062 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 26] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800091 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 13] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800154 milliseconds before timing out.
+Process SpawnProcess-1:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 48] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800306 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 10] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800153 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 33] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800109 milliseconds before timing out.
+Process SpawnProcess-3:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 54] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800085 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800178 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 58] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800098 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 34] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800108 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 35] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800113 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 50] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800150 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 6] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800156 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 25] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800108 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 37] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800180 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 21] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800119 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800253 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 42] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800177 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 43] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800115 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 51] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800167 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 9] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800211 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 4] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800640 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 60] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800891 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 0] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800910 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 5] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800162 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800171 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 38] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800084 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 11] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800103 milliseconds before timing out.
+Process SpawnProcess-4:
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 19] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800119 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 52] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800908 milliseconds before timing out.
+Process SpawnProcess-4:
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 27] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800101 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 39] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800092 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 36] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800908 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800163 milliseconds before timing out.
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 23] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800112 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 16] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800894 milliseconds before timing out.
+Process SpawnProcess-4:
+Process SpawnProcess-1:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800969 milliseconds before timing out.
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+RuntimeError: [Rank 7] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800165 milliseconds before timing out.
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 12] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800914 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 40] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801183 milliseconds before timing out.
+Process SpawnProcess-1:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 8] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800956 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 22] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800114 milliseconds before timing out.
+Process SpawnProcess-1:
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 24] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800865 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 56] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801088 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 20] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800797 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 32] Caught collective operation timeout: WorkNCCL(SeqNum=3130418, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801293 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+srun: error: gpua014: task 0: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+srun: error: gpua091: task 13: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+srun: error: gpua063: task 10: Exited with exit code 1
+srun: error: gpua060: task 8: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+srun: error: gpua062: task 9: Exited with exit code 1
+srun: error: gpua041: task 7: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    main()
+    main()
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    S2TTask.main(cmd=cmd)
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+    raise ProcessExitedException(
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+    return _run_code(code, main_globals, None,
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+    exec(code, run_globals)
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+    raise ProcessExitedException(
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+srun: error: gpua096: task 15: Exited with exit code 1
+srun: error: gpua022: task 6: Exited with exit code 1
+srun: error: gpua016: task 2: Exited with exit code 1
+srun: error: gpua015: task 1: Exited with exit code 1
+srun: error: gpua021: task 5: Exited with exit code 1
+srun: error: gpua018: task 3: Exited with exit code 1
+srun: error: gpua093: task 14: Exited with exit code 1
+srun: error: gpua020: task 4: Exited with exit code 1
+srun: error: gpua088: task 12: Exited with exit code 1
+# Accounting: begin_time=1688368916
+# Accounting: end_time=1688420633
+# Accounting: time=51717 threads=1
+# Finished at Mon Jul 3 16:43:53 CDT 2023 with status 1
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.12.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.12.log
new file mode 100644
index 0000000000000000000000000000000000000000..d0bc4d19d3abe7c316fc8f96e3c1598cd5bb4663
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.12.log
@@ -0,0 +1,4556 @@
+# Running on gpub001.delta.ncsa.illinois.edu
+# Started at Sun Jul 2 01:35:09 CDT 2023
+# SLURMD_NODENAME=gpub001
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2115302
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x32)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2115302
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[001,009,011-016,031-032,035,037-041,058-059,061,064-068,075,080,083,085,088-091]'
+# SLURM_JOB_NUM_NODES=32
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=32
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[001,009,011-016,031-032,035,037-041,058-059,061,064-068,075,080,083,085,088-091]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x32)'
+# SLURM_TASK_PID=279842
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_75cd0950-5140-438e-8876-03978c5bec75
+[gpub001:0/128] 2023-07-02 01:39:06,588 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub001:0/128] 2023-07-02 01:39:08,180 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 128 nodes.
+[gpub001:0/128] 2023-07-02 01:39:08,214 (s2t:483) INFO: Vocabulary size: 50002
+[gpub001:0/128] 2023-07-02 01:39:28,889 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub001:0/128] 2023-07-02 01:39:28,898 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub001:0/128] 2023-07-02 01:39:28,898 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub001:0/128] 2023-07-02 01:39:28,898 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub001:0/128] 2023-07-02 01:39:28,899 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub001:0/128] 2023-07-02 01:39:29,583 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub001:0/128] 2023-07-02 01:39:42,237 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 01:39:42,389 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f961e02fe80>)
+[gpub001:0/128] 2023-07-02 01:39:42,389 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=506, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/128] 2023-07-02 01:39:42,389 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=506, mean=256.1, min=256, max=257
+[gpub001:0/128] 2023-07-02 01:39:42,865 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 01:39:43,172 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f961e02fb20>)
+[gpub001:0/128] 2023-07-02 01:39:43,172 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/128] 2023-07-02 01:39:43,173 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+gpub001:279948:279948 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:279948:279948 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:279948:279948 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub001:0/128] 2023-07-02 01:39:49,879 (trainer:284) INFO: 1/100epoch started
+[gpub001:0/128] 2023-07-02 01:39:49,937 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/128] 2023-07-02 01:40:11,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 01:40:15,587 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f940aef5cf0>)
+[gpub001:0/128] 2023-07-02 01:40:15,588 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/128] 2023-07-02 01:40:15,591 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+gpub012:1262644:1262644 [0] NCCL INFO cudaDriverVersion 12010
+gpub012:1262644:1262644 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1262644:1262644 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1262644:1262706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1262644:1262706 [0] NCCL INFO Using network IB
+gpub012:1262644:1262706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub012:1262644:1262706 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub012:1262644:1262706 [0] NCCL INFO Connected all rings
+gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub012:1262644:1262706 [0] NCCL INFO Connected all trees
+gpub012:1262644:1262706 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub012:1262644:1262706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1262644:1262706 [0] NCCL INFO comm 0xb67ef980 rank 12 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub067:1289107:1289107 [0] NCCL INFO cudaDriverVersion 12010
+gpub067:1289107:1289107 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1289107:1289107 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1289107:1289169 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1289107:1289169 [0] NCCL INFO Using network IB
+gpub067:1289107:1289169 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub067:1289107:1289169 [0] NCCL INFO Trees [0] 89/92/-1->88->80 [1] 89/-1/-1->88->85
+gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 87[c7000] -> 88[7000] [receive] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 87[c7000] -> 88[7000] [receive] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 88[7000] -> 89[46000] via P2P/IPC
+gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 88[7000] -> 89[46000] via P2P/IPC
+gpub067:1289107:1289169 [0] NCCL INFO Connected all rings
+gpub014:1242932:1242932 [2] NCCL INFO cudaDriverVersion 12010
+gpub014:1242932:1242932 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1242932:1242932 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1242932:1242996 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1242932:1242996 [2] NCCL INFO Using network IB
+gpub014:1242932:1242996 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub014:1242932:1242996 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub014:1242932:1242996 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub014:1242932:1242996 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub014:1242932:1242996 [2] NCCL INFO Connected all rings
+gpub014:1242932:1242996 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub014:1242932:1242996 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub064:1376670:1376670 [1] NCCL INFO cudaDriverVersion 12010
+gpub064:1376670:1376670 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0>
+gpub064:1376670:1376670 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub064:1376670:1376736 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0>
+gpub064:1376670:1376736 [1] NCCL INFO Using network IB
+gpub064:1376670:1376736 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub064:1376670:1376736 [1] NCCL INFO Trees [0] 78/-1/-1->77->76 [1] 78/84/-1->77->76
+gpub064:1376670:1376736 [1] NCCL INFO Channel 00/0 : 77[46000] -> 78[85000] via P2P/IPC
+gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 77[46000] -> 78[85000] via P2P/IPC
+gpub064:1376670:1376736 [1] NCCL INFO Connected all rings
+gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 77[46000] -> 84[7000] [send] via NET/IB/0
+gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 84[7000] -> 77[46000] [receive] via NET/IB/0
+gpub037:1358540:1358540 [2] NCCL INFO cudaDriverVersion 12010
+gpub037:1358540:1358540 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1358540:1358540 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1358540:1358598 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1358540:1358598 [2] NCCL INFO Using network IB
+gpub037:1358540:1358598 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub037:1358540:1358598 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub037:1358540:1358598 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub037:1358540:1358598 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub037:1358540:1358598 [2] NCCL INFO Connected all rings
+gpub037:1358540:1358598 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub037:1358540:1358598 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub012:1262647:1262647 [3] NCCL INFO cudaDriverVersion 12010
+gpub012:1262647:1262647 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1262647:1262647 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1262647:1262709 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1262647:1262709 [3] NCCL INFO Using network IB
+gpub012:1262647:1262709 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub012:1262647:1262709 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub012:1262647:1262709 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub012:1262647:1262709 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub012:1262647:1262709 [3] NCCL INFO Connected all rings
+gpub012:1262647:1262709 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub012:1262647:1262709 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub032:2709149:2709149 [1] NCCL INFO cudaDriverVersion 12010
+gpub032:2709149:2709149 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:2709149:2709149 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:2709149:2709215 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:2709149:2709215 [1] NCCL INFO Using network IB
+gpub032:2709149:2709215 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub032:2709149:2709215 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub032:2709149:2709215 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub032:2709149:2709215 [1] NCCL INFO Connected all rings
+gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub089:1443526:1443526 [0] NCCL INFO cudaDriverVersion 12010
+gpub089:1443526:1443526 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0>
+gpub089:1443526:1443526 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub089:1443526:1443886 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0>
+gpub089:1443526:1443886 [0] NCCL INFO Using network IB
+gpub089:1443526:1443886 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub089:1443526:1443886 [0] NCCL INFO Trees [0] 117/-1/-1->116->121 [1] 117/112/-1->116->109
+gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 115[c7000] -> 116[7000] [receive] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 115[c7000] -> 116[7000] [receive] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 116[7000] -> 117[46000] via P2P/IPC
+gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 116[7000] -> 117[46000] via P2P/IPC
+gpub009:1313739:1313739 [0] NCCL INFO cudaDriverVersion 12010
+gpub009:1313739:1313739 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0>
+gpub009:1313739:1313739 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub009:1313739:1313796 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0>
+gpub009:1313739:1313796 [0] NCCL INFO Using network IB
+gpub009:1313739:1313796 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub009:1313739:1313796 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub009:1313739:1313796 [0] NCCL INFO Connected all rings
+gpub039:1773546:1773546 [1] NCCL INFO cudaDriverVersion 12010
+gpub039:1773546:1773546 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:1773546:1773546 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:1773546:1773609 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:1773546:1773609 [1] NCCL INFO Using network IB
+gpub039:1773546:1773609 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub039:1773546:1773609 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub039:1773546:1773609 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub039:1773546:1773609 [1] NCCL INFO Connected all rings
+gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub059:1722911:1722911 [2] NCCL INFO cudaDriverVersion 12010
+gpub059:1722911:1722911 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1722911:1722911 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1722911:1722975 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1722911:1722975 [2] NCCL INFO Using network IB
+gpub059:1722911:1722975 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub059:1722911:1722975 [2] NCCL INFO Trees [0] 71/-1/-1->70->69 [1] 71/-1/-1->70->69
+gpub059:1722911:1722975 [2] NCCL INFO Channel 00/0 : 70[85000] -> 71[c7000] via P2P/IPC
+gpub059:1722911:1722975 [2] NCCL INFO Channel 01/0 : 70[85000] -> 71[c7000] via P2P/IPC
+gpub059:1722911:1722975 [2] NCCL INFO Connected all rings
+gpub059:1722911:1722975 [2] NCCL INFO Channel 00/0 : 70[85000] -> 69[46000] via P2P/IPC
+gpub059:1722911:1722975 [2] NCCL INFO Channel 01/0 : 70[85000] -> 69[46000] via P2P/IPC
+gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 85[46000] -> 88[7000] [receive] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 88[7000] -> 92[7000] [send] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 80[7000] -> 88[7000] [receive] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 88[7000] -> 80[7000] [send] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 00/0 : 92[7000] -> 88[7000] [receive] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Channel 01/0 : 88[7000] -> 85[46000] [send] via NET/IB/0
+gpub067:1289107:1289169 [0] NCCL INFO Connected all trees
+gpub067:1289107:1289169 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub067:1289107:1289169 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1289107:1289169 [0] NCCL INFO comm 0x4f28f210 rank 88 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub014:1242932:1242996 [2] NCCL INFO Connected all trees
+gpub014:1242932:1242996 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub014:1242932:1242996 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1242932:1242996 [2] NCCL INFO comm 0x51262ad0 rank 22 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub064:1376670:1376736 [1] NCCL INFO Channel 00/0 : 77[46000] -> 76[7000] via P2P/IPC
+gpub064:1376670:1376736 [1] NCCL INFO Channel 01/0 : 77[46000] -> 76[7000] via P2P/IPC
+gpub064:1376670:1376736 [1] NCCL INFO Connected all trees
+gpub064:1376670:1376736 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub064:1376670:1376736 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub064:1376670:1376736 [1] NCCL INFO comm 0x504a2c90 rank 77 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub037:1358540:1358598 [2] NCCL INFO Connected all trees
+gpub037:1358540:1358598 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub037:1358540:1358598 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1358540:1358598 [2] NCCL INFO comm 0x8ec6490 rank 46 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub037:1358541:1358541 [3] NCCL INFO cudaDriverVersion 12010
+gpub037:1358541:1358541 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1358541:1358541 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1358541:1358597 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1358541:1358597 [3] NCCL INFO Using network IB
+gpub037:1358541:1358597 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub037:1358541:1358597 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub037:1358541:1358597 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub012:1262647:1262709 [3] NCCL INFO Connected all trees
+gpub012:1262647:1262709 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub012:1262647:1262709 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1262647:1262709 [3] NCCL INFO comm 0xa9d6d20 rank 15 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub032:2709149:2709215 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub032:2709149:2709215 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub032:2709149:2709215 [1] NCCL INFO Connected all trees
+gpub032:2709149:2709215 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub032:2709149:2709215 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:2709149:2709215 [1] NCCL INFO comm 0x4f9a7620 rank 37 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub089:1443526:1443886 [0] NCCL INFO Connected all rings
+gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 112[7000] -> 116[7000] [receive] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 116[7000] -> 121[46000] [send] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 109[46000] -> 116[7000] [receive] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 116[7000] -> 109[46000] [send] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 00/0 : 121[46000] -> 116[7000] [receive] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Channel 01/0 : 116[7000] -> 112[7000] [send] via NET/IB/0
+gpub089:1443526:1443886 [0] NCCL INFO Connected all trees
+gpub089:1443526:1443886 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub089:1443526:1443886 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub089:1443526:1443886 [0] NCCL INFO comm 0x9a37990 rank 116 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub009:1313739:1313796 [0] NCCL INFO Connected all trees
+gpub009:1313739:1313796 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub009:1313739:1313796 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub009:1313739:1313796 [0] NCCL INFO comm 0xd47f4d80 rank 4 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub039:1773546:1773609 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub039:1773546:1773609 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub039:1773546:1773609 [1] NCCL INFO Connected all trees
+gpub039:1773546:1773609 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub039:1773546:1773609 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:1773546:1773609 [1] NCCL INFO comm 0x51566170 rank 53 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub059:1722911:1722975 [2] NCCL INFO Connected all trees
+gpub059:1722911:1722975 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub059:1722911:1722975 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1722911:1722975 [2] NCCL INFO comm 0x50aa6ce0 rank 70 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub067:1289110:1289110 [3] NCCL INFO cudaDriverVersion 12010
+gpub067:1289110:1289110 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1289110:1289110 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1289110:1289167 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1289110:1289167 [3] NCCL INFO Using network IB
+gpub067:1289110:1289167 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub067:1289110:1289167 [3] NCCL INFO Trees [0] -1/-1/-1->91->90 [1] -1/-1/-1->91->90
+gpub067:1289110:1289167 [3] NCCL INFO Channel 00/0 : 91[c7000] -> 92[7000] [send] via NET/IB/0
+gpub067:1289110:1289167 [3] NCCL INFO Channel 01/0 : 91[c7000] -> 92[7000] [send] via NET/IB/0
+gpub067:1289110:1289167 [3] NCCL INFO Connected all rings
+gpub067:1289110:1289167 [3] NCCL INFO Channel 00/0 : 91[c7000] -> 90[85000] via P2P/IPC
+gpub067:1289110:1289167 [3] NCCL INFO Channel 01/0 : 91[c7000] -> 90[85000] via P2P/IPC
+gpub014:1242930:1242930 [0] NCCL INFO cudaDriverVersion 12010
+gpub014:1242930:1242930 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1242930:1242930 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1242930:1242994 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1242930:1242994 [0] NCCL INFO Using network IB
+gpub014:1242930:1242994 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub014:1242930:1242994 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub014:1242930:1242994 [0] NCCL INFO Connected all rings
+gpub064:1376671:1376671 [2] NCCL INFO cudaDriverVersion 12010
+gpub064:1376671:1376671 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0>
+gpub064:1376671:1376671 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub064:1376671:1376737 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0>
+gpub064:1376671:1376737 [2] NCCL INFO Using network IB
+gpub064:1376671:1376737 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub064:1376671:1376737 [2] NCCL INFO Trees [0] 79/-1/-1->78->77 [1] 79/-1/-1->78->77
+gpub064:1376671:1376737 [2] NCCL INFO Channel 00/0 : 78[85000] -> 79[c7000] via P2P/IPC
+gpub064:1376671:1376737 [2] NCCL INFO Channel 01/0 : 78[85000] -> 79[c7000] via P2P/IPC
+gpub064:1376671:1376737 [2] NCCL INFO Connected all rings
+gpub064:1376671:1376737 [2] NCCL INFO Channel 00/0 : 78[85000] -> 77[46000] via P2P/IPC
+gpub064:1376671:1376737 [2] NCCL INFO Channel 01/0 : 78[85000] -> 77[46000] via P2P/IPC
+gpub037:1358541:1358597 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub037:1358541:1358597 [3] NCCL INFO Connected all rings
+gpub037:1358541:1358597 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub037:1358541:1358597 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub037:1358541:1358597 [3] NCCL INFO Connected all trees
+gpub037:1358541:1358597 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub037:1358541:1358597 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1358541:1358597 [3] NCCL INFO comm 0x9c2bba0 rank 47 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub012:1262646:1262646 [2] NCCL INFO cudaDriverVersion 12010
+gpub012:1262646:1262646 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1262646:1262646 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1262646:1262708 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1262646:1262708 [2] NCCL INFO Using network IB
+gpub012:1262646:1262708 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub012:1262646:1262708 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub012:1262646:1262708 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub012:1262646:1262708 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub012:1262646:1262708 [2] NCCL INFO Connected all rings
+gpub012:1262646:1262708 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub012:1262646:1262708 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub032:2709150:2709150 [2] NCCL INFO cudaDriverVersion 12010
+gpub032:2709150:2709150 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:2709150:2709150 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:2709150:2709213 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:2709150:2709213 [2] NCCL INFO Using network IB
+gpub032:2709150:2709213 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub032:2709150:2709213 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub032:2709150:2709213 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub032:2709150:2709213 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub032:2709150:2709213 [2] NCCL INFO Connected all rings
+gpub032:2709150:2709213 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub032:2709150:2709213 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub089:1443527:1443527 [1] NCCL INFO cudaDriverVersion 12010
+gpub089:1443527:1443527 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0>
+gpub089:1443527:1443527 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub089:1443527:1443889 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0>
+gpub089:1443527:1443889 [1] NCCL INFO Using network IB
+gpub089:1443527:1443889 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub089:1443527:1443889 [1] NCCL INFO Trees [0] 118/-1/-1->117->116 [1] 118/120/-1->117->116
+gpub089:1443527:1443889 [1] NCCL INFO Channel 00/0 : 117[46000] -> 118[85000] via P2P/IPC
+gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 117[46000] -> 118[85000] via P2P/IPC
+gpub089:1443527:1443889 [1] NCCL INFO Connected all rings
+gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 117[46000] -> 120[7000] [send] via NET/IB/0
+gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 120[7000] -> 117[46000] [receive] via NET/IB/0
+gpub009:1313741:1313741 [2] NCCL INFO cudaDriverVersion 12010
+gpub009:1313741:1313741 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0>
+gpub009:1313741:1313741 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub009:1313741:1313799 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0>
+gpub009:1313741:1313799 [2] NCCL INFO Using network IB
+gpub009:1313741:1313799 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub009:1313741:1313799 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub009:1313741:1313799 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub009:1313741:1313799 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub009:1313741:1313799 [2] NCCL INFO Connected all rings
+gpub009:1313741:1313799 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub009:1313741:1313799 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub039:1773548:1773548 [3] NCCL INFO cudaDriverVersion 12010
+gpub039:1773548:1773548 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:1773548:1773548 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:1773548:1773612 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:1773548:1773612 [3] NCCL INFO Using network IB
+gpub039:1773548:1773612 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub039:1773548:1773612 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub039:1773548:1773612 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub039:1773548:1773612 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub039:1773548:1773612 [3] NCCL INFO Connected all rings
+gpub039:1773548:1773612 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub039:1773548:1773612 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub067:1289110:1289167 [3] NCCL INFO Connected all trees
+gpub067:1289110:1289167 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub067:1289110:1289167 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1289110:1289167 [3] NCCL INFO comm 0x50179280 rank 91 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub014:1242930:1242994 [0] NCCL INFO Connected all trees
+gpub014:1242930:1242994 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub014:1242930:1242994 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1242930:1242994 [0] NCCL INFO comm 0x9cc906c0 rank 20 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub064:1376671:1376737 [2] NCCL INFO Connected all trees
+gpub064:1376671:1376737 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub064:1376671:1376737 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub064:1376671:1376737 [2] NCCL INFO comm 0x8cf5dc0 rank 78 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub037:1358539:1358539 [1] NCCL INFO cudaDriverVersion 12010
+gpub037:1358539:1358539 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1358539:1358539 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1358539:1358600 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1358539:1358600 [1] NCCL INFO Using network IB
+gpub037:1358539:1358600 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub037:1358539:1358600 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub037:1358539:1358600 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub037:1358539:1358600 [1] NCCL INFO Connected all rings
+gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub012:1262646:1262708 [2] NCCL INFO Connected all trees
+gpub012:1262646:1262708 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub012:1262646:1262708 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1262646:1262708 [2] NCCL INFO comm 0xa94349a0 rank 14 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub032:2709150:2709213 [2] NCCL INFO Connected all trees
+gpub032:2709150:2709213 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub032:2709150:2709213 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:2709150:2709213 [2] NCCL INFO comm 0x50422250 rank 38 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub089:1443527:1443889 [1] NCCL INFO Channel 00/0 : 117[46000] -> 116[7000] via P2P/IPC
+gpub089:1443527:1443889 [1] NCCL INFO Channel 01/0 : 117[46000] -> 116[7000] via P2P/IPC
+gpub089:1443527:1443889 [1] NCCL INFO Connected all trees
+gpub089:1443527:1443889 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub089:1443527:1443889 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub089:1443527:1443889 [1] NCCL INFO comm 0x8c6983c0 rank 117 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub009:1313741:1313799 [2] NCCL INFO Connected all trees
+gpub009:1313741:1313799 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub009:1313741:1313799 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub009:1313741:1313799 [2] NCCL INFO comm 0x50fe65e0 rank 6 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub039:1773548:1773612 [3] NCCL INFO Connected all trees
+gpub039:1773548:1773612 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub039:1773548:1773612 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:1773548:1773612 [3] NCCL INFO comm 0x8ad50680 rank 55 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub067:1289109:1289109 [2] NCCL INFO cudaDriverVersion 12010
+gpub067:1289109:1289109 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1289109:1289109 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1289109:1289166 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1289109:1289166 [2] NCCL INFO Using network IB
+gpub067:1289109:1289166 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub067:1289109:1289166 [2] NCCL INFO Trees [0] 91/-1/-1->90->89 [1] 91/-1/-1->90->89
+gpub067:1289109:1289166 [2] NCCL INFO Channel 00/0 : 90[85000] -> 91[c7000] via P2P/IPC
+gpub067:1289109:1289166 [2] NCCL INFO Channel 01/0 : 90[85000] -> 91[c7000] via P2P/IPC
+gpub067:1289109:1289166 [2] NCCL INFO Connected all rings
+gpub067:1289109:1289166 [2] NCCL INFO Channel 00/0 : 90[85000] -> 89[46000] via P2P/IPC
+gpub067:1289109:1289166 [2] NCCL INFO Channel 01/0 : 90[85000] -> 89[46000] via P2P/IPC
+gpub014:1242933:1242933 [3] NCCL INFO cudaDriverVersion 12010
+gpub014:1242933:1242933 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1242933:1242933 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1242933:1242997 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1242933:1242997 [3] NCCL INFO Using network IB
+gpub014:1242933:1242997 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub014:1242933:1242997 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub014:1242933:1242997 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub014:1242933:1242997 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub014:1242933:1242997 [3] NCCL INFO Connected all rings
+gpub014:1242933:1242997 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub014:1242933:1242997 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub064:1376672:1376672 [3] NCCL INFO cudaDriverVersion 12010
+gpub064:1376672:1376672 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0>
+gpub064:1376672:1376672 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub064:1376672:1376735 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0>
+gpub064:1376672:1376735 [3] NCCL INFO Using network IB
+gpub064:1376672:1376735 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub064:1376672:1376735 [3] NCCL INFO Trees [0] -1/-1/-1->79->78 [1] -1/-1/-1->79->78
+gpub064:1376672:1376735 [3] NCCL INFO Channel 00/0 : 79[c7000] -> 80[7000] [send] via NET/IB/0
+gpub064:1376672:1376735 [3] NCCL INFO Channel 01/0 : 79[c7000] -> 80[7000] [send] via NET/IB/0
+gpub064:1376672:1376735 [3] NCCL INFO Connected all rings
+gpub064:1376672:1376735 [3] NCCL INFO Channel 00/0 : 79[c7000] -> 78[85000] via P2P/IPC
+gpub064:1376672:1376735 [3] NCCL INFO Channel 01/0 : 79[c7000] -> 78[85000] via P2P/IPC
+gpub037:1358539:1358600 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub037:1358539:1358600 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub037:1358539:1358600 [1] NCCL INFO Connected all trees
+gpub037:1358539:1358600 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub037:1358539:1358600 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1358539:1358600 [1] NCCL INFO comm 0x4f6a3790 rank 45 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub032:2709151:2709151 [3] NCCL INFO cudaDriverVersion 12010
+gpub032:2709151:2709151 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:2709151:2709151 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:2709151:2709214 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:2709151:2709214 [3] NCCL INFO Using network IB
+gpub032:2709151:2709214 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub032:2709151:2709214 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub032:2709151:2709214 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub032:2709151:2709214 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub032:2709151:2709214 [3] NCCL INFO Connected all rings
+gpub032:2709151:2709214 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub032:2709151:2709214 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub089:1443528:1443528 [2] NCCL INFO cudaDriverVersion 12010
+gpub089:1443528:1443528 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0>
+gpub089:1443528:1443528 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub089:1443528:1443888 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0>
+gpub089:1443528:1443888 [2] NCCL INFO Using network IB
+gpub089:1443528:1443888 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub089:1443528:1443888 [2] NCCL INFO Trees [0] 119/-1/-1->118->117 [1] 119/-1/-1->118->117
+gpub089:1443528:1443888 [2] NCCL INFO Channel 00/0 : 118[85000] -> 119[c7000] via P2P/IPC
+gpub089:1443528:1443888 [2] NCCL INFO Channel 01/0 : 118[85000] -> 119[c7000] via P2P/IPC
+gpub089:1443528:1443888 [2] NCCL INFO Connected all rings
+gpub089:1443528:1443888 [2] NCCL INFO Channel 00/0 : 118[85000] -> 117[46000] via P2P/IPC
+gpub089:1443528:1443888 [2] NCCL INFO Channel 01/0 : 118[85000] -> 117[46000] via P2P/IPC
+gpub039:1773545:1773545 [0] NCCL INFO cudaDriverVersion 12010
+gpub039:1773545:1773545 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:1773545:1773545 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:1773545:1773610 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:1773545:1773610 [0] NCCL INFO Using network IB
+gpub039:1773545:1773610 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub039:1773545:1773610 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub039:1773545:1773610 [0] NCCL INFO Connected all rings
+gpub067:1289109:1289166 [2] NCCL INFO Connected all trees
+gpub067:1289109:1289166 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub067:1289109:1289166 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1289109:1289166 [2] NCCL INFO comm 0x4f921a60 rank 90 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub014:1242933:1242997 [3] NCCL INFO Connected all trees
+gpub014:1242933:1242997 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub014:1242933:1242997 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1242933:1242997 [3] NCCL INFO comm 0x9eb06d0 rank 23 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub064:1376672:1376735 [3] NCCL INFO Connected all trees
+gpub064:1376672:1376735 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub064:1376672:1376735 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub064:1376672:1376735 [3] NCCL INFO comm 0xc156a340 rank 79 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub037:1358538:1358538 [0] NCCL INFO cudaDriverVersion 12010
+gpub037:1358538:1358538 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1358538:1358538 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1358538:1358599 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1358538:1358599 [0] NCCL INFO Using network IB
+gpub037:1358538:1358599 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub037:1358538:1358599 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub037:1358538:1358599 [0] NCCL INFO Connected all rings
+gpub032:2709151:2709214 [3] NCCL INFO Connected all trees
+gpub032:2709151:2709214 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub032:2709151:2709214 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:2709151:2709214 [3] NCCL INFO comm 0x514b7d40 rank 39 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub089:1443528:1443888 [2] NCCL INFO Connected all trees
+gpub089:1443528:1443888 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub089:1443528:1443888 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub089:1443528:1443888 [2] NCCL INFO comm 0xb9b05cd0 rank 118 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub039:1773545:1773610 [0] NCCL INFO Connected all trees
+gpub039:1773545:1773610 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub039:1773545:1773610 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:1773545:1773610 [0] NCCL INFO comm 0x5171f660 rank 52 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub067:1289108:1289108 [1] NCCL INFO cudaDriverVersion 12010
+gpub067:1289108:1289108 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1289108:1289108 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1289108:1289168 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1289108:1289168 [1] NCCL INFO Using network IB
+gpub067:1289108:1289168 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub067:1289108:1289168 [1] NCCL INFO Trees [0] 90/84/-1->89->88 [1] 90/-1/-1->89->88
+gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 89[46000] -> 90[85000] via P2P/IPC
+gpub067:1289108:1289168 [1] NCCL INFO Channel 01/0 : 89[46000] -> 90[85000] via P2P/IPC
+gpub067:1289108:1289168 [1] NCCL INFO Connected all rings
+gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 84[7000] -> 89[46000] [receive] via NET/IB/0
+gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 89[46000] -> 84[7000] [send] via NET/IB/0
+gpub064:1376669:1376669 [0] NCCL INFO cudaDriverVersion 12010
+gpub064:1376669:1376669 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.164<0>
+gpub064:1376669:1376669 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub064:1376669:1376734 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.164<0>
+gpub064:1376669:1376734 [0] NCCL INFO Using network IB
+gpub064:1376669:1376734 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub064:1376669:1376734 [0] NCCL INFO Trees [0] 77/-1/-1->76->72 [1] 77/68/-1->76->92
+gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 75[c7000] -> 76[7000] [receive] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 75[c7000] -> 76[7000] [receive] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 76[7000] -> 77[46000] via P2P/IPC
+gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 76[7000] -> 77[46000] via P2P/IPC
+gpub064:1376669:1376734 [0] NCCL INFO Connected all rings
+gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub037:1358538:1358599 [0] NCCL INFO Connected all trees
+gpub037:1358538:1358599 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub037:1358538:1358599 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1358538:1358599 [0] NCCL INFO comm 0x4fcb02a0 rank 44 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub089:1443529:1443529 [3] NCCL INFO cudaDriverVersion 12010
+gpub089:1443529:1443529 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.189<0>
+gpub089:1443529:1443529 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub089:1443529:1443887 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.189<0>
+gpub089:1443529:1443887 [3] NCCL INFO Using network IB
+gpub089:1443529:1443887 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub089:1443529:1443887 [3] NCCL INFO Trees [0] -1/-1/-1->119->118 [1] -1/-1/-1->119->118
+gpub089:1443529:1443887 [3] NCCL INFO Channel 00/0 : 119[c7000] -> 120[7000] [send] via NET/IB/0
+gpub089:1443529:1443887 [3] NCCL INFO Channel 01/0 : 119[c7000] -> 120[7000] [send] via NET/IB/0
+gpub089:1443529:1443887 [3] NCCL INFO Connected all rings
+gpub089:1443529:1443887 [3] NCCL INFO Channel 00/0 : 119[c7000] -> 118[85000] via P2P/IPC
+gpub089:1443529:1443887 [3] NCCL INFO Channel 01/0 : 119[c7000] -> 118[85000] via P2P/IPC
+gpub067:1289108:1289168 [1] NCCL INFO Channel 00/0 : 89[46000] -> 88[7000] via P2P/IPC
+gpub067:1289108:1289168 [1] NCCL INFO Channel 01/0 : 89[46000] -> 88[7000] via P2P/IPC
+gpub067:1289108:1289168 [1] NCCL INFO Connected all trees
+gpub067:1289108:1289168 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub067:1289108:1289168 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1289108:1289168 [1] NCCL INFO comm 0x519a0210 rank 89 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 72[7000] -> 76[7000] [receive] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 68[7000] -> 76[7000] [receive] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 76[7000] -> 92[7000] [send] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 92[7000] -> 76[7000] [receive] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 01/0 : 76[7000] -> 68[7000] [send] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Channel 00/0 : 76[7000] -> 72[7000] [send] via NET/IB/0
+gpub064:1376669:1376734 [0] NCCL INFO Connected all trees
+gpub064:1376669:1376734 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub064:1376669:1376734 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub064:1376669:1376734 [0] NCCL INFO comm 0x8d2203f0 rank 76 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub089:1443529:1443887 [3] NCCL INFO Connected all trees
+gpub089:1443529:1443887 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub089:1443529:1443887 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub089:1443529:1443887 [3] NCCL INFO comm 0x8b1a64f0 rank 119 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub009:1313742:1313742 [3] NCCL INFO cudaDriverVersion 12010
+gpub009:1313742:1313742 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0>
+gpub009:1313742:1313742 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub009:1313742:1313798 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0>
+gpub009:1313742:1313798 [3] NCCL INFO Using network IB
+gpub009:1313742:1313798 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub009:1313742:1313798 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub009:1313742:1313798 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub009:1313742:1313798 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub009:1313742:1313798 [3] NCCL INFO Connected all rings
+gpub009:1313742:1313798 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub009:1313742:1313798 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub009:1313742:1313798 [3] NCCL INFO Connected all trees
+gpub009:1313742:1313798 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub009:1313742:1313798 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub009:1313742:1313798 [3] NCCL INFO comm 0x9d5ff680 rank 7 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub032:2709148:2709148 [0] NCCL INFO cudaDriverVersion 12010
+gpub032:2709148:2709148 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:2709148:2709148 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:2709148:2709212 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:2709148:2709212 [0] NCCL INFO Using network IB
+gpub032:2709148:2709212 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub032:2709148:2709212 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub032:2709148:2709212 [0] NCCL INFO Connected all rings
+gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub032:2709148:2709212 [0] NCCL INFO Connected all trees
+gpub032:2709148:2709212 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub032:2709148:2709212 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:2709148:2709212 [0] NCCL INFO comm 0x9c01580 rank 36 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub039:1773547:1773547 [2] NCCL INFO cudaDriverVersion 12010
+gpub039:1773547:1773547 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:1773547:1773547 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:1773547:1773611 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:1773547:1773611 [2] NCCL INFO Using network IB
+gpub039:1773547:1773611 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub039:1773547:1773611 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub039:1773547:1773611 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub039:1773547:1773611 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub039:1773547:1773611 [2] NCCL INFO Connected all rings
+gpub039:1773547:1773611 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub039:1773547:1773611 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub039:1773547:1773611 [2] NCCL INFO Connected all trees
+gpub039:1773547:1773611 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub039:1773547:1773611 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:1773547:1773611 [2] NCCL INFO comm 0x516f33e0 rank 54 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub009:1313740:1313740 [1] NCCL INFO cudaDriverVersion 12010
+gpub009:1313740:1313740 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.109<0>
+gpub009:1313740:1313740 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub009:1313740:1313797 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.109<0>
+gpub009:1313740:1313797 [1] NCCL INFO Using network IB
+gpub009:1313740:1313797 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub009:1313740:1313797 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub009:1313740:1313797 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub009:1313740:1313797 [1] NCCL INFO Connected all rings
+gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub009:1313740:1313797 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub009:1313740:1313797 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub009:1313740:1313797 [1] NCCL INFO Connected all trees
+gpub009:1313740:1313797 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub009:1313740:1313797 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub009:1313740:1313797 [1] NCCL INFO comm 0x51a376d0 rank 5 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub014:1242931:1242931 [1] NCCL INFO cudaDriverVersion 12010
+gpub014:1242931:1242931 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1242931:1242931 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1242931:1242995 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1242931:1242995 [1] NCCL INFO Using network IB
+gpub014:1242931:1242995 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub014:1242931:1242995 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub014:1242931:1242995 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub014:1242931:1242995 [1] NCCL INFO Connected all rings
+gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub014:1242931:1242995 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub014:1242931:1242995 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub014:1242931:1242995 [1] NCCL INFO Connected all trees
+gpub014:1242931:1242995 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub014:1242931:1242995 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1242931:1242995 [1] NCCL INFO comm 0xa3cf110 rank 21 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub012:1262645:1262645 [1] NCCL INFO cudaDriverVersion 12010
+gpub012:1262645:1262645 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1262645:1262645 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1262645:1262707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1262645:1262707 [1] NCCL INFO Using network IB
+gpub012:1262645:1262707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub012:1262645:1262707 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub012:1262645:1262707 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub012:1262645:1262707 [1] NCCL INFO Connected all rings
+gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub012:1262645:1262707 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub012:1262645:1262707 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub012:1262645:1262707 [1] NCCL INFO Connected all trees
+gpub012:1262645:1262707 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub012:1262645:1262707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1262645:1262707 [1] NCCL INFO comm 0x8bc40190 rank 13 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub059:1722912:1722912 [3] NCCL INFO cudaDriverVersion 12010
+gpub059:1722912:1722912 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1722912:1722912 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1722912:1722976 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1722912:1722976 [3] NCCL INFO Using network IB
+gpub059:1722912:1722976 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub059:1722912:1722976 [3] NCCL INFO Trees [0] -1/-1/-1->71->70 [1] -1/-1/-1->71->70
+gpub059:1722912:1722976 [3] NCCL INFO Channel 00/0 : 71[c7000] -> 72[7000] [send] via NET/IB/0
+gpub059:1722912:1722976 [3] NCCL INFO Channel 01/0 : 71[c7000] -> 72[7000] [send] via NET/IB/0
+gpub059:1722912:1722976 [3] NCCL INFO Connected all rings
+gpub059:1722912:1722976 [3] NCCL INFO Channel 00/0 : 71[c7000] -> 70[85000] via P2P/IPC
+gpub059:1722912:1722976 [3] NCCL INFO Channel 01/0 : 71[c7000] -> 70[85000] via P2P/IPC
+gpub059:1722912:1722976 [3] NCCL INFO Connected all trees
+gpub059:1722912:1722976 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub059:1722912:1722976 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1722912:1722976 [3] NCCL INFO comm 0x50e6bd70 rank 71 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub059:1722910:1722910 [1] NCCL INFO cudaDriverVersion 12010
+gpub059:1722910:1722910 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1722910:1722910 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1722910:1722978 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1722910:1722978 [1] NCCL INFO Using network IB
+gpub059:1722910:1722978 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub059:1722910:1722978 [1] NCCL INFO Trees [0] 70/-1/-1->69->68 [1] 70/72/-1->69->68
+gpub059:1722910:1722978 [1] NCCL INFO Channel 00/0 : 69[46000] -> 70[85000] via P2P/IPC
+gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 69[46000] -> 70[85000] via P2P/IPC
+gpub059:1722910:1722978 [1] NCCL INFO Connected all rings
+gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 69[46000] -> 72[7000] [send] via NET/IB/0
+gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 72[7000] -> 69[46000] [receive] via NET/IB/0
+gpub059:1722910:1722978 [1] NCCL INFO Channel 00/0 : 69[46000] -> 68[7000] via P2P/IPC
+gpub059:1722910:1722978 [1] NCCL INFO Channel 01/0 : 69[46000] -> 68[7000] via P2P/IPC
+gpub059:1722910:1722978 [1] NCCL INFO Connected all trees
+gpub059:1722910:1722978 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub059:1722910:1722978 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1722910:1722978 [1] NCCL INFO comm 0x4fd07e80 rank 69 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub059:1722909:1722909 [0] NCCL INFO cudaDriverVersion 12010
+gpub059:1722909:1722909 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1722909:1722909 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1722909:1722977 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1722909:1722977 [0] NCCL INFO Using network IB
+gpub059:1722909:1722977 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub059:1722909:1722977 [0] NCCL INFO Trees [0] 69/-1/-1->68->73 [1] 69/64/-1->68->76
+gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 67[c7000] -> 68[7000] [receive] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 67[c7000] -> 68[7000] [receive] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 68[7000] -> 69[46000] via P2P/IPC
+gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 68[7000] -> 69[46000] via P2P/IPC
+gpub059:1722909:1722977 [0] NCCL INFO Connected all rings
+gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 64[7000] -> 68[7000] [receive] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 68[7000] -> 73[46000] [send] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 68[7000] -> 76[7000] [send] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 76[7000] -> 68[7000] [receive] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 00/0 : 73[46000] -> 68[7000] [receive] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Channel 01/0 : 68[7000] -> 64[7000] [send] via NET/IB/0
+gpub059:1722909:1722977 [0] NCCL INFO Connected all trees
+gpub059:1722909:1722977 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub059:1722909:1722977 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1722909:1722977 [0] NCCL INFO comm 0x4f5becc0 rank 68 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub088:1265112:1265112 [2] NCCL INFO cudaDriverVersion 12010
+gpub088:1265112:1265112 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0>
+gpub088:1265112:1265112 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub088:1265112:1265182 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0>
+gpub088:1265112:1265182 [2] NCCL INFO Using network IB
+gpub088:1265112:1265182 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub088:1265112:1265182 [2] NCCL INFO Trees [0] 115/-1/-1->114->113 [1] 115/-1/-1->114->113
+gpub088:1265112:1265182 [2] NCCL INFO Channel 00/0 : 114[85000] -> 115[c7000] via P2P/IPC
+gpub088:1265112:1265182 [2] NCCL INFO Channel 01/0 : 114[85000] -> 115[c7000] via P2P/IPC
+gpub088:1265112:1265182 [2] NCCL INFO Connected all rings
+gpub088:1265112:1265182 [2] NCCL INFO Channel 00/0 : 114[85000] -> 113[46000] via P2P/IPC
+gpub088:1265112:1265182 [2] NCCL INFO Channel 01/0 : 114[85000] -> 113[46000] via P2P/IPC
+gpub088:1265112:1265182 [2] NCCL INFO Connected all trees
+gpub088:1265112:1265182 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub088:1265112:1265182 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub088:1265112:1265182 [2] NCCL INFO comm 0x8c4ab250 rank 114 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub088:1265110:1265110 [0] NCCL INFO cudaDriverVersion 12010
+gpub088:1265110:1265110 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0>
+gpub088:1265110:1265110 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub088:1265110:1265183 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0>
+gpub088:1265110:1265183 [0] NCCL INFO Using network IB
+gpub088:1265110:1265183 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub088:1265110:1265183 [0] NCCL INFO Trees [0] 113/120/-1->112->96 [1] 113/-1/-1->112->116
+gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 111[c7000] -> 112[7000] [receive] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 111[c7000] -> 112[7000] [receive] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 112[7000] -> 113[46000] via P2P/IPC
+gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 112[7000] -> 113[46000] via P2P/IPC
+gpub088:1265110:1265183 [0] NCCL INFO Connected all rings
+gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 112[7000] -> 116[7000] [send] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 112[7000] -> 120[7000] [send] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 96[7000] -> 112[7000] [receive] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 112[7000] -> 96[7000] [send] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 00/0 : 120[7000] -> 112[7000] [receive] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Channel 01/0 : 116[7000] -> 112[7000] [receive] via NET/IB/0
+gpub088:1265110:1265183 [0] NCCL INFO Connected all trees
+gpub088:1265110:1265183 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub088:1265110:1265183 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub088:1265110:1265183 [0] NCCL INFO comm 0xa9d75ce0 rank 112 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub038:1419973:1419973 [3] NCCL INFO cudaDriverVersion 12010
+gpub038:1419973:1419973 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0>
+gpub038:1419973:1419973 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub038:1419973:1420037 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0>
+gpub038:1419973:1420037 [3] NCCL INFO Using network IB
+gpub038:1419973:1420037 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub038:1419973:1420037 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub038:1419973:1420037 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub038:1419973:1420037 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub038:1419973:1420037 [3] NCCL INFO Connected all rings
+gpub038:1419973:1420037 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub038:1419973:1420037 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub038:1419973:1420037 [3] NCCL INFO Connected all trees
+gpub038:1419973:1420037 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub038:1419973:1420037 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub038:1419973:1420037 [3] NCCL INFO comm 0x93398f0 rank 51 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub038:1419971:1419971 [1] NCCL INFO cudaDriverVersion 12010
+gpub038:1419971:1419971 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0>
+gpub038:1419971:1419971 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub038:1419971:1420034 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0>
+gpub038:1419971:1420034 [1] NCCL INFO Using network IB
+gpub038:1419971:1420034 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub038:1419971:1420034 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub038:1419971:1420034 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub038:1419971:1420034 [1] NCCL INFO Connected all rings
+gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub038:1419971:1420034 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub038:1419971:1420034 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub038:1419971:1420034 [1] NCCL INFO Connected all trees
+gpub038:1419971:1420034 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub038:1419971:1420034 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub038:1419971:1420034 [1] NCCL INFO comm 0xb5e5ede0 rank 49 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub038:1419972:1419972 [2] NCCL INFO cudaDriverVersion 12010
+gpub038:1419972:1419972 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0>
+gpub038:1419972:1419972 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub038:1419972:1420036 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0>
+gpub038:1419972:1420036 [2] NCCL INFO Using network IB
+gpub038:1419972:1420036 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub038:1419972:1420036 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub038:1419972:1420036 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub038:1419972:1420036 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub038:1419972:1420036 [2] NCCL INFO Connected all rings
+gpub038:1419972:1420036 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub038:1419972:1420036 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub038:1419972:1420036 [2] NCCL INFO Connected all trees
+gpub038:1419972:1420036 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub038:1419972:1420036 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub038:1419972:1420036 [2] NCCL INFO comm 0x94b05e40 rank 50 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub088:1265111:1265111 [1] NCCL INFO cudaDriverVersion 12010
+gpub088:1265111:1265111 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0>
+gpub088:1265111:1265111 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub088:1265111:1265184 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0>
+gpub088:1265111:1265184 [1] NCCL INFO Using network IB
+gpub088:1265111:1265184 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub088:1265111:1265184 [1] NCCL INFO Trees [0] 114/104/-1->113->112 [1] 114/-1/-1->113->112
+gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 113[46000] -> 114[85000] via P2P/IPC
+gpub088:1265111:1265184 [1] NCCL INFO Channel 01/0 : 113[46000] -> 114[85000] via P2P/IPC
+gpub088:1265111:1265184 [1] NCCL INFO Connected all rings
+gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 104[7000] -> 113[46000] [receive] via NET/IB/0
+gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 113[46000] -> 104[7000] [send] via NET/IB/0
+gpub088:1265111:1265184 [1] NCCL INFO Channel 00/0 : 113[46000] -> 112[7000] via P2P/IPC
+gpub088:1265111:1265184 [1] NCCL INFO Channel 01/0 : 113[46000] -> 112[7000] via P2P/IPC
+gpub088:1265111:1265184 [1] NCCL INFO Connected all trees
+gpub088:1265111:1265184 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub088:1265111:1265184 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub088:1265111:1265184 [1] NCCL INFO comm 0xa8f2e890 rank 113 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub088:1265113:1265113 [3] NCCL INFO cudaDriverVersion 12010
+gpub088:1265113:1265113 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.188<0>
+gpub088:1265113:1265113 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub088:1265113:1265181 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.188<0>
+gpub088:1265113:1265181 [3] NCCL INFO Using network IB
+gpub088:1265113:1265181 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub088:1265113:1265181 [3] NCCL INFO Trees [0] -1/-1/-1->115->114 [1] -1/-1/-1->115->114
+gpub088:1265113:1265181 [3] NCCL INFO Channel 00/0 : 115[c7000] -> 116[7000] [send] via NET/IB/0
+gpub088:1265113:1265181 [3] NCCL INFO Channel 01/0 : 115[c7000] -> 116[7000] [send] via NET/IB/0
+gpub088:1265113:1265181 [3] NCCL INFO Connected all rings
+gpub088:1265113:1265181 [3] NCCL INFO Channel 00/0 : 115[c7000] -> 114[85000] via P2P/IPC
+gpub088:1265113:1265181 [3] NCCL INFO Channel 01/0 : 115[c7000] -> 114[85000] via P2P/IPC
+gpub088:1265113:1265181 [3] NCCL INFO Connected all trees
+gpub088:1265113:1265181 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub088:1265113:1265181 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub088:1265113:1265181 [3] NCCL INFO comm 0xb90f03c0 rank 115 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub038:1419970:1419970 [0] NCCL INFO cudaDriverVersion 12010
+gpub038:1419970:1419970 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.138<0>
+gpub038:1419970:1419970 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub038:1419970:1420035 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.138<0>
+gpub038:1419970:1420035 [0] NCCL INFO Using network IB
+gpub038:1419970:1420035 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub038:1419970:1420035 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub038:1419970:1420035 [0] NCCL INFO Connected all rings
+gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub038:1419970:1420035 [0] NCCL INFO Connected all trees
+gpub038:1419970:1420035 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub038:1419970:1420035 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub038:1419970:1420035 [0] NCCL INFO comm 0x8b623c00 rank 48 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub083:326512:326512 [3] NCCL INFO cudaDriverVersion 12010
+gpub083:326512:326512 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0>
+gpub083:326512:326512 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub083:326512:326570 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0>
+gpub083:326512:326570 [3] NCCL INFO Using network IB
+gpub083:326512:326570 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub083:326512:326570 [3] NCCL INFO Trees [0] -1/-1/-1->107->106 [1] -1/-1/-1->107->106
+gpub083:326512:326570 [3] NCCL INFO Channel 00/0 : 107[c7000] -> 108[7000] [send] via NET/IB/0
+gpub083:326512:326570 [3] NCCL INFO Channel 01/0 : 107[c7000] -> 108[7000] [send] via NET/IB/0
+gpub083:326512:326570 [3] NCCL INFO Connected all rings
+gpub083:326512:326570 [3] NCCL INFO Channel 00/0 : 107[c7000] -> 106[85000] via P2P/IPC
+gpub083:326512:326570 [3] NCCL INFO Channel 01/0 : 107[c7000] -> 106[85000] via P2P/IPC
+gpub083:326512:326570 [3] NCCL INFO Connected all trees
+gpub083:326512:326570 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub083:326512:326570 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub083:326512:326570 [3] NCCL INFO comm 0x50bb38f0 rank 107 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub083:326511:326511 [2] NCCL INFO cudaDriverVersion 12010
+gpub083:326511:326511 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0>
+gpub083:326511:326511 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub083:326511:326571 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0>
+gpub083:326511:326571 [2] NCCL INFO Using network IB
+gpub083:326511:326571 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub083:326511:326571 [2] NCCL INFO Trees [0] 107/-1/-1->106->105 [1] 107/-1/-1->106->105
+gpub083:326511:326571 [2] NCCL INFO Channel 00/0 : 106[85000] -> 107[c7000] via P2P/IPC
+gpub083:326511:326571 [2] NCCL INFO Channel 01/0 : 106[85000] -> 107[c7000] via P2P/IPC
+gpub083:326511:326571 [2] NCCL INFO Connected all rings
+gpub083:326511:326571 [2] NCCL INFO Channel 00/0 : 106[85000] -> 105[46000] via P2P/IPC
+gpub083:326511:326571 [2] NCCL INFO Channel 01/0 : 106[85000] -> 105[46000] via P2P/IPC
+gpub083:326511:326571 [2] NCCL INFO Connected all trees
+gpub083:326511:326571 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub083:326511:326571 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub083:326511:326571 [2] NCCL INFO comm 0x8cf83ae0 rank 106 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub083:326509:326509 [0] NCCL INFO cudaDriverVersion 12010
+gpub083:326509:326509 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0>
+gpub083:326509:326509 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub083:326509:326573 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0>
+gpub083:326509:326573 [0] NCCL INFO Using network IB
+gpub083:326509:326573 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub083:326509:326573 [0] NCCL INFO Trees [0] 105/108/-1->104->113 [1] 105/-1/-1->104->101
+gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 103[c7000] -> 104[7000] [receive] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 103[c7000] -> 104[7000] [receive] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 104[7000] -> 105[46000] via P2P/IPC
+gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 104[7000] -> 105[46000] via P2P/IPC
+gpub083:326509:326573 [0] NCCL INFO Connected all rings
+gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 101[46000] -> 104[7000] [receive] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 104[7000] -> 108[7000] [send] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 104[7000] -> 113[46000] [send] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 113[46000] -> 104[7000] [receive] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 00/0 : 108[7000] -> 104[7000] [receive] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Channel 01/0 : 104[7000] -> 101[46000] [send] via NET/IB/0
+gpub083:326509:326573 [0] NCCL INFO Connected all trees
+gpub083:326509:326573 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub083:326509:326573 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub083:326509:326573 [0] NCCL INFO comm 0x50391690 rank 104 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub013:1454152:1454152 [0] NCCL INFO cudaDriverVersion 12010
+gpub013:1454152:1454152 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1454152:1454152 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1454152:1454217 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1454152:1454217 [0] NCCL INFO Using network IB
+gpub013:1454152:1454217 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub013:1454152:1454217 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub013:1454152:1454217 [0] NCCL INFO Connected all rings
+gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub013:1454152:1454217 [0] NCCL INFO Connected all trees
+gpub013:1454152:1454217 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub013:1454152:1454217 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1454152:1454217 [0] NCCL INFO comm 0xbf779d50 rank 16 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub013:1454154:1454154 [2] NCCL INFO cudaDriverVersion 12010
+gpub013:1454154:1454154 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1454154:1454154 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1454154:1454216 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1454154:1454216 [2] NCCL INFO Using network IB
+gpub013:1454154:1454216 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub013:1454154:1454216 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub013:1454154:1454216 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub013:1454154:1454216 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub013:1454154:1454216 [2] NCCL INFO Connected all rings
+gpub013:1454154:1454216 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub013:1454154:1454216 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub013:1454154:1454216 [2] NCCL INFO Connected all trees
+gpub013:1454154:1454216 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub013:1454154:1454216 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1454154:1454216 [2] NCCL INFO comm 0x8c14b260 rank 18 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub065:1317230:1317230 [3] NCCL INFO cudaDriverVersion 12010
+gpub065:1317230:1317230 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0>
+gpub065:1317230:1317230 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub065:1317230:1317295 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0>
+gpub065:1317230:1317295 [3] NCCL INFO Using network IB
+gpub065:1317230:1317295 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub065:1317230:1317295 [3] NCCL INFO Trees [0] -1/-1/-1->83->82 [1] -1/-1/-1->83->82
+gpub065:1317230:1317295 [3] NCCL INFO Channel 00/0 : 83[c7000] -> 84[7000] [send] via NET/IB/0
+gpub065:1317230:1317295 [3] NCCL INFO Channel 01/0 : 83[c7000] -> 84[7000] [send] via NET/IB/0
+gpub065:1317230:1317295 [3] NCCL INFO Connected all rings
+gpub065:1317230:1317295 [3] NCCL INFO Channel 00/0 : 83[c7000] -> 82[85000] via P2P/IPC
+gpub065:1317230:1317295 [3] NCCL INFO Channel 01/0 : 83[c7000] -> 82[85000] via P2P/IPC
+gpub065:1317230:1317295 [3] NCCL INFO Connected all trees
+gpub065:1317230:1317295 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub065:1317230:1317295 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub065:1317230:1317295 [3] NCCL INFO comm 0x8ebce5d0 rank 83 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub065:1317229:1317229 [2] NCCL INFO cudaDriverVersion 12010
+gpub065:1317229:1317229 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0>
+gpub065:1317229:1317229 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub065:1317229:1317296 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0>
+gpub065:1317229:1317296 [2] NCCL INFO Using network IB
+gpub065:1317229:1317296 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub065:1317229:1317296 [2] NCCL INFO Trees [0] 83/-1/-1->82->81 [1] 83/-1/-1->82->81
+gpub065:1317229:1317296 [2] NCCL INFO Channel 00/0 : 82[85000] -> 83[c7000] via P2P/IPC
+gpub065:1317229:1317296 [2] NCCL INFO Channel 01/0 : 82[85000] -> 83[c7000] via P2P/IPC
+gpub065:1317229:1317296 [2] NCCL INFO Connected all rings
+gpub065:1317229:1317296 [2] NCCL INFO Channel 00/0 : 82[85000] -> 81[46000] via P2P/IPC
+gpub065:1317229:1317296 [2] NCCL INFO Channel 01/0 : 82[85000] -> 81[46000] via P2P/IPC
+gpub065:1317229:1317296 [2] NCCL INFO Connected all trees
+gpub065:1317229:1317296 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub065:1317229:1317296 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub065:1317229:1317296 [2] NCCL INFO comm 0x50c3e4a0 rank 82 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub013:1454155:1454155 [3] NCCL INFO cudaDriverVersion 12010
+gpub013:1454155:1454155 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1454155:1454155 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1454155:1454218 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1454155:1454218 [3] NCCL INFO Using network IB
+gpub013:1454155:1454218 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub013:1454155:1454218 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub013:1454155:1454218 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub013:1454155:1454218 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub013:1454155:1454218 [3] NCCL INFO Connected all rings
+gpub013:1454155:1454218 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub013:1454155:1454218 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub013:1454155:1454218 [3] NCCL INFO Connected all trees
+gpub013:1454155:1454218 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub013:1454155:1454218 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1454155:1454218 [3] NCCL INFO comm 0xaba06b70 rank 19 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub083:326510:326510 [1] NCCL INFO cudaDriverVersion 12010
+gpub083:326510:326510 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.183<0>
+gpub083:326510:326510 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub083:326510:326572 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.183<0>
+gpub083:326510:326572 [1] NCCL INFO Using network IB
+gpub083:326510:326572 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub083:326510:326572 [1] NCCL INFO Trees [0] 106/100/-1->105->104 [1] 106/-1/-1->105->104
+gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 105[46000] -> 106[85000] via P2P/IPC
+gpub083:326510:326572 [1] NCCL INFO Channel 01/0 : 105[46000] -> 106[85000] via P2P/IPC
+gpub083:326510:326572 [1] NCCL INFO Connected all rings
+gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 100[7000] -> 105[46000] [receive] via NET/IB/0
+gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 105[46000] -> 100[7000] [send] via NET/IB/0
+gpub083:326510:326572 [1] NCCL INFO Channel 00/0 : 105[46000] -> 104[7000] via P2P/IPC
+gpub083:326510:326572 [1] NCCL INFO Channel 01/0 : 105[46000] -> 104[7000] via P2P/IPC
+gpub083:326510:326572 [1] NCCL INFO Connected all trees
+gpub083:326510:326572 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub083:326510:326572 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub083:326510:326572 [1] NCCL INFO comm 0xb489cca0 rank 105 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub065:1317228:1317228 [1] NCCL INFO cudaDriverVersion 12010
+gpub065:1317228:1317228 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0>
+gpub065:1317228:1317228 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub065:1317228:1317297 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0>
+gpub065:1317228:1317297 [1] NCCL INFO Using network IB
+gpub065:1317228:1317297 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub065:1317228:1317297 [1] NCCL INFO Trees [0] 82/72/-1->81->80 [1] 82/-1/-1->81->80
+gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 81[46000] -> 82[85000] via P2P/IPC
+gpub065:1317228:1317297 [1] NCCL INFO Channel 01/0 : 81[46000] -> 82[85000] via P2P/IPC
+gpub065:1317228:1317297 [1] NCCL INFO Connected all rings
+gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 72[7000] -> 81[46000] [receive] via NET/IB/0
+gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 81[46000] -> 72[7000] [send] via NET/IB/0
+gpub065:1317228:1317297 [1] NCCL INFO Channel 00/0 : 81[46000] -> 80[7000] via P2P/IPC
+gpub065:1317228:1317297 [1] NCCL INFO Channel 01/0 : 81[46000] -> 80[7000] via P2P/IPC
+gpub065:1317228:1317297 [1] NCCL INFO Connected all trees
+gpub065:1317228:1317297 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub065:1317228:1317297 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub065:1317228:1317297 [1] NCCL INFO comm 0x9cb3f50 rank 81 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub013:1454153:1454153 [1] NCCL INFO cudaDriverVersion 12010
+gpub013:1454153:1454153 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1454153:1454153 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1454153:1454219 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1454153:1454219 [1] NCCL INFO Using network IB
+gpub013:1454153:1454219 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub013:1454153:1454219 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub013:1454153:1454219 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub013:1454153:1454219 [1] NCCL INFO Connected all rings
+gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub013:1454153:1454219 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub013:1454153:1454219 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub013:1454153:1454219 [1] NCCL INFO Connected all trees
+gpub013:1454153:1454219 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub013:1454153:1454219 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1454153:1454219 [1] NCCL INFO comm 0x504a7bd0 rank 17 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub075:323056:323056 [1] NCCL INFO cudaDriverVersion 12010
+gpub075:323056:323056 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:323056:323056 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:323056:323126 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:323056:323126 [1] NCCL INFO Using network IB
+gpub075:323056:323126 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub075:323056:323126 [1] NCCL INFO Trees [0] 98/80/-1->97->96 [1] 98/-1/-1->97->96
+gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 97[46000] -> 98[85000] via P2P/IPC
+gpub075:323056:323126 [1] NCCL INFO Channel 01/0 : 97[46000] -> 98[85000] via P2P/IPC
+gpub075:323056:323126 [1] NCCL INFO Connected all rings
+gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 80[7000] -> 97[46000] [receive] via NET/IB/0
+gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 97[46000] -> 80[7000] [send] via NET/IB/0
+gpub075:323056:323126 [1] NCCL INFO Channel 00/0 : 97[46000] -> 96[7000] via P2P/IPC
+gpub075:323056:323126 [1] NCCL INFO Channel 01/0 : 97[46000] -> 96[7000] via P2P/IPC
+gpub075:323056:323126 [1] NCCL INFO Connected all trees
+gpub075:323056:323126 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub075:323056:323126 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:323056:323126 [1] NCCL INFO comm 0xa49e4e0 rank 97 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub065:1317227:1317227 [0] NCCL INFO cudaDriverVersion 12010
+gpub065:1317227:1317227 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.165<0>
+gpub065:1317227:1317227 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub065:1317227:1317294 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.165<0>
+gpub065:1317227:1317294 [0] NCCL INFO Using network IB
+gpub065:1317227:1317294 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub065:1317227:1317294 [0] NCCL INFO Trees [0] 81/88/-1->80->97 [1] 81/-1/-1->80->84
+gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 79[c7000] -> 80[7000] [receive] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 79[c7000] -> 80[7000] [receive] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 80[7000] -> 81[46000] via P2P/IPC
+gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 80[7000] -> 81[46000] via P2P/IPC
+gpub065:1317227:1317294 [0] NCCL INFO Connected all rings
+gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 80[7000] -> 84[7000] [send] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 80[7000] -> 88[7000] [send] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 80[7000] -> 97[46000] [send] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 97[46000] -> 80[7000] [receive] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 00/0 : 88[7000] -> 80[7000] [receive] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Channel 01/0 : 84[7000] -> 80[7000] [receive] via NET/IB/0
+gpub065:1317227:1317294 [0] NCCL INFO Connected all trees
+gpub065:1317227:1317294 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub065:1317227:1317294 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub065:1317227:1317294 [0] NCCL INFO comm 0xb6488400 rank 80 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub068:1244814:1244814 [2] NCCL INFO cudaDriverVersion 12010
+gpub068:1244814:1244814 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0>
+gpub068:1244814:1244814 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub068:1244814:1244875 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0>
+gpub068:1244814:1244875 [2] NCCL INFO Using network IB
+gpub068:1244814:1244875 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub068:1244814:1244875 [2] NCCL INFO Trees [0] 95/-1/-1->94->93 [1] 95/-1/-1->94->93
+gpub068:1244814:1244875 [2] NCCL INFO Channel 00/0 : 94[85000] -> 95[c7000] via P2P/IPC
+gpub068:1244814:1244875 [2] NCCL INFO Channel 01/0 : 94[85000] -> 95[c7000] via P2P/IPC
+gpub068:1244814:1244875 [2] NCCL INFO Connected all rings
+gpub068:1244814:1244875 [2] NCCL INFO Channel 00/0 : 94[85000] -> 93[46000] via P2P/IPC
+gpub068:1244814:1244875 [2] NCCL INFO Channel 01/0 : 94[85000] -> 93[46000] via P2P/IPC
+gpub068:1244814:1244875 [2] NCCL INFO Connected all trees
+gpub068:1244814:1244875 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub068:1244814:1244875 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub068:1244814:1244875 [2] NCCL INFO comm 0xbeb7020 rank 94 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub068:1244815:1244815 [3] NCCL INFO cudaDriverVersion 12010
+gpub068:1244815:1244815 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0>
+gpub068:1244815:1244815 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub068:1244815:1244878 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0>
+gpub068:1244815:1244878 [3] NCCL INFO Using network IB
+gpub068:1244815:1244878 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub068:1244815:1244878 [3] NCCL INFO Trees [0] -1/-1/-1->95->94 [1] -1/-1/-1->95->94
+gpub068:1244815:1244878 [3] NCCL INFO Channel 00/0 : 95[c7000] -> 96[7000] [send] via NET/IB/0
+gpub068:1244815:1244878 [3] NCCL INFO Channel 01/0 : 95[c7000] -> 96[7000] [send] via NET/IB/0
+gpub068:1244815:1244878 [3] NCCL INFO Connected all rings
+gpub068:1244815:1244878 [3] NCCL INFO Channel 00/0 : 95[c7000] -> 94[85000] via P2P/IPC
+gpub068:1244815:1244878 [3] NCCL INFO Channel 01/0 : 95[c7000] -> 94[85000] via P2P/IPC
+gpub068:1244815:1244878 [3] NCCL INFO Connected all trees
+gpub068:1244815:1244878 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub068:1244815:1244878 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub068:1244815:1244878 [3] NCCL INFO comm 0x8e4e7950 rank 95 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub058:1406058:1406058 [2] NCCL INFO cudaDriverVersion 12010
+gpub058:1406058:1406058 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0>
+gpub058:1406058:1406058 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub058:1406058:1406115 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0>
+gpub058:1406058:1406115 [2] NCCL INFO Using network IB
+gpub058:1406058:1406115 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub058:1406058:1406115 [2] NCCL INFO Trees [0] 67/-1/-1->66->65 [1] 67/-1/-1->66->65
+gpub058:1406058:1406115 [2] NCCL INFO Channel 00/0 : 66[85000] -> 67[c7000] via P2P/IPC
+gpub058:1406058:1406115 [2] NCCL INFO Channel 01/0 : 66[85000] -> 67[c7000] via P2P/IPC
+gpub058:1406058:1406115 [2] NCCL INFO Connected all rings
+gpub058:1406058:1406115 [2] NCCL INFO Channel 00/0 : 66[85000] -> 65[46000] via P2P/IPC
+gpub058:1406058:1406115 [2] NCCL INFO Channel 01/0 : 66[85000] -> 65[46000] via P2P/IPC
+gpub058:1406058:1406115 [2] NCCL INFO Connected all trees
+gpub058:1406058:1406115 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub058:1406058:1406115 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub058:1406058:1406115 [2] NCCL INFO comm 0xb9766da0 rank 66 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub001:279948:280013 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:279948:280013 [0] NCCL INFO Using network IB
+gpub001:279948:280013 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub001:279948:280013 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:279948:280013 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:279948:280013 [0] NCCL INFO Trees [0] 1/64/-1->0->-1 [1] 1/-1/-1->0->4
+gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 127[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 127[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:279948:280013 [0] NCCL INFO Connected all rings
+gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 64[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:279948:280013 [0] NCCL INFO Channel 00/0 : 0[7000] -> 64[7000] [send] via NET/IB/0
+gpub001:279948:280013 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:279948:280013 [0] NCCL INFO Connected all trees
+gpub001:279948:280013 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub001:279948:280013 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:279948:280013 [0] NCCL INFO comm 0x4fb35be0 rank 0 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub075:323057:323057 [2] NCCL INFO cudaDriverVersion 12010
+gpub075:323057:323057 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:323057:323057 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:323057:323128 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:323057:323128 [2] NCCL INFO Using network IB
+gpub075:323057:323128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub075:323057:323128 [2] NCCL INFO Trees [0] 99/-1/-1->98->97 [1] 99/-1/-1->98->97
+gpub075:323057:323128 [2] NCCL INFO Channel 00/0 : 98[85000] -> 99[c7000] via P2P/IPC
+gpub075:323057:323128 [2] NCCL INFO Channel 01/0 : 98[85000] -> 99[c7000] via P2P/IPC
+gpub075:323057:323128 [2] NCCL INFO Connected all rings
+gpub075:323057:323128 [2] NCCL INFO Channel 00/0 : 98[85000] -> 97[46000] via P2P/IPC
+gpub075:323057:323128 [2] NCCL INFO Channel 01/0 : 98[85000] -> 97[46000] via P2P/IPC
+gpub075:323057:323128 [2] NCCL INFO Connected all trees
+gpub075:323057:323128 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub075:323057:323128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:323057:323128 [2] NCCL INFO comm 0xb913a860 rank 98 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub061:1342467:1342467 [1] NCCL INFO cudaDriverVersion 12010
+gpub061:1342467:1342467 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0>
+gpub061:1342467:1342467 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub061:1342467:1342541 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0>
+gpub061:1342467:1342541 [1] NCCL INFO Using network IB
+gpub061:1342467:1342541 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub061:1342467:1342541 [1] NCCL INFO Trees [0] 74/68/-1->73->72 [1] 74/-1/-1->73->72
+gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 73[46000] -> 74[85000] via P2P/IPC
+gpub061:1342467:1342541 [1] NCCL INFO Channel 01/0 : 73[46000] -> 74[85000] via P2P/IPC
+gpub061:1342467:1342541 [1] NCCL INFO Connected all rings
+gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 68[7000] -> 73[46000] [receive] via NET/IB/0
+gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 73[46000] -> 68[7000] [send] via NET/IB/0
+gpub061:1342467:1342541 [1] NCCL INFO Channel 00/0 : 73[46000] -> 72[7000] via P2P/IPC
+gpub061:1342467:1342541 [1] NCCL INFO Channel 01/0 : 73[46000] -> 72[7000] via P2P/IPC
+gpub061:1342467:1342541 [1] NCCL INFO Connected all trees
+gpub061:1342467:1342541 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub061:1342467:1342541 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub061:1342467:1342541 [1] NCCL INFO comm 0x50772b20 rank 73 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub001:279950:279950 [2] NCCL INFO cudaDriverVersion 12010
+gpub001:279950:279950 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:279950:279950 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:279950:280016 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:279950:280016 [2] NCCL INFO Using network IB
+gpub001:279950:280016 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub001:279950:280016 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub001:279950:280016 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:279950:280016 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:279950:280016 [2] NCCL INFO Connected all rings
+gpub001:279950:280016 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:279950:280016 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:279950:280016 [2] NCCL INFO Connected all trees
+gpub001:279950:280016 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub001:279950:280016 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:279950:280016 [2] NCCL INFO comm 0x8c644f40 rank 2 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub091:1688862:1688862 [0] NCCL INFO cudaDriverVersion 12010
+gpub091:1688862:1688862 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:1688862:1688862 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:1688862:1688930 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:1688862:1688930 [0] NCCL INFO Using network IB
+gpub091:1688862:1688930 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub091:1688862:1688930 [0] NCCL INFO Trees [0] 125/-1/-1->124->120 [1] 125/60/-1->124->-1
+gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 123[c7000] -> 124[7000] [receive] via NET/IB/0
+gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 123[c7000] -> 124[7000] [receive] via NET/IB/0
+gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 124[7000] -> 125[46000] via P2P/IPC
+gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 124[7000] -> 125[46000] via P2P/IPC
+gpub091:1688862:1688930 [0] NCCL INFO Connected all rings
+gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 120[7000] -> 124[7000] [receive] via NET/IB/0
+gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 60[7000] -> 124[7000] [receive] via NET/IB/0
+gpub091:1688862:1688930 [0] NCCL INFO Channel 01/0 : 124[7000] -> 60[7000] [send] via NET/IB/0
+gpub091:1688862:1688930 [0] NCCL INFO Channel 00/0 : 124[7000] -> 120[7000] [send] via NET/IB/0
+gpub091:1688862:1688930 [0] NCCL INFO Connected all trees
+gpub091:1688862:1688930 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub091:1688862:1688930 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:1688862:1688930 [0] NCCL INFO comm 0xb5c78140 rank 124 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub061:1342466:1342466 [0] NCCL INFO cudaDriverVersion 12010
+gpub061:1342466:1342466 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0>
+gpub061:1342466:1342466 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub061:1342466:1342539 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0>
+gpub061:1342466:1342539 [0] NCCL INFO Using network IB
+gpub061:1342466:1342539 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub061:1342466:1342539 [0] NCCL INFO Trees [0] 73/76/-1->72->81 [1] 73/-1/-1->72->69
+gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 71[c7000] -> 72[7000] [receive] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 71[c7000] -> 72[7000] [receive] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 72[7000] -> 73[46000] via P2P/IPC
+gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 72[7000] -> 73[46000] via P2P/IPC
+gpub061:1342466:1342539 [0] NCCL INFO Connected all rings
+gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 69[46000] -> 72[7000] [receive] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 72[7000] -> 76[7000] [send] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 72[7000] -> 81[46000] [send] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 81[46000] -> 72[7000] [receive] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 00/0 : 76[7000] -> 72[7000] [receive] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Channel 01/0 : 72[7000] -> 69[46000] [send] via NET/IB/0
+gpub061:1342466:1342539 [0] NCCL INFO Connected all trees
+gpub061:1342466:1342539 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub061:1342466:1342539 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub061:1342466:1342539 [0] NCCL INFO comm 0x8c41510 rank 72 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub090:1179805:1179805 [2] NCCL INFO cudaDriverVersion 12010
+gpub090:1179805:1179805 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0>
+gpub090:1179805:1179805 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub090:1179805:1179868 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0>
+gpub090:1179805:1179868 [2] NCCL INFO Using network IB
+gpub090:1179805:1179868 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub090:1179805:1179868 [2] NCCL INFO Trees [0] 123/-1/-1->122->121 [1] 123/-1/-1->122->121
+gpub090:1179805:1179868 [2] NCCL INFO Channel 00/0 : 122[85000] -> 123[c7000] via P2P/IPC
+gpub090:1179805:1179868 [2] NCCL INFO Channel 01/0 : 122[85000] -> 123[c7000] via P2P/IPC
+gpub090:1179805:1179868 [2] NCCL INFO Connected all rings
+gpub090:1179805:1179868 [2] NCCL INFO Channel 00/0 : 122[85000] -> 121[46000] via P2P/IPC
+gpub090:1179805:1179868 [2] NCCL INFO Channel 01/0 : 122[85000] -> 121[46000] via P2P/IPC
+gpub075:323058:323058 [3] NCCL INFO cudaDriverVersion 12010
+gpub075:323058:323058 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:323058:323058 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:323058:323127 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:323058:323127 [3] NCCL INFO Using network IB
+gpub075:323058:323127 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub075:323058:323127 [3] NCCL INFO Trees [0] -1/-1/-1->99->98 [1] -1/-1/-1->99->98
+gpub075:323058:323127 [3] NCCL INFO Channel 00/0 : 99[c7000] -> 100[7000] [send] via NET/IB/0
+gpub075:323058:323127 [3] NCCL INFO Channel 01/0 : 99[c7000] -> 100[7000] [send] via NET/IB/0
+gpub075:323058:323127 [3] NCCL INFO Connected all rings
+gpub075:323058:323127 [3] NCCL INFO Channel 00/0 : 99[c7000] -> 98[85000] via P2P/IPC
+gpub075:323058:323127 [3] NCCL INFO Channel 01/0 : 99[c7000] -> 98[85000] via P2P/IPC
+gpub090:1179805:1179868 [2] NCCL INFO Connected all trees
+gpub090:1179805:1179868 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub090:1179805:1179868 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub090:1179805:1179868 [2] NCCL INFO comm 0x50f1c1b0 rank 122 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub075:323058:323127 [3] NCCL INFO Connected all trees
+gpub075:323058:323127 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub075:323058:323127 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:323058:323127 [3] NCCL INFO comm 0x50ae89c0 rank 99 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub091:1688864:1688864 [2] NCCL INFO cudaDriverVersion 12010
+gpub091:1688864:1688864 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:1688864:1688864 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:1688864:1688932 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:1688864:1688932 [2] NCCL INFO Using network IB
+gpub091:1688864:1688932 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub091:1688864:1688932 [2] NCCL INFO Trees [0] 127/-1/-1->126->125 [1] 127/-1/-1->126->125
+gpub091:1688864:1688932 [2] NCCL INFO Channel 00/0 : 126[85000] -> 127[c7000] via P2P/IPC
+gpub091:1688864:1688932 [2] NCCL INFO Channel 01/0 : 126[85000] -> 127[c7000] via P2P/IPC
+gpub091:1688864:1688932 [2] NCCL INFO Connected all rings
+gpub091:1688864:1688932 [2] NCCL INFO Channel 00/0 : 126[85000] -> 125[46000] via P2P/IPC
+gpub091:1688864:1688932 [2] NCCL INFO Channel 01/0 : 126[85000] -> 125[46000] via P2P/IPC
+gpub091:1688864:1688932 [2] NCCL INFO Connected all trees
+gpub091:1688864:1688932 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub091:1688864:1688932 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:1688864:1688932 [2] NCCL INFO comm 0x51d52410 rank 126 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub058:1406059:1406059 [3] NCCL INFO cudaDriverVersion 12010
+gpub058:1406059:1406059 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0>
+gpub058:1406059:1406059 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub058:1406059:1406116 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0>
+gpub058:1406059:1406116 [3] NCCL INFO Using network IB
+gpub058:1406059:1406116 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub058:1406059:1406116 [3] NCCL INFO Trees [0] -1/-1/-1->67->66 [1] -1/-1/-1->67->66
+gpub058:1406059:1406116 [3] NCCL INFO Channel 00/0 : 67[c7000] -> 68[7000] [send] via NET/IB/0
+gpub058:1406059:1406116 [3] NCCL INFO Channel 01/0 : 67[c7000] -> 68[7000] [send] via NET/IB/0
+gpub058:1406059:1406116 [3] NCCL INFO Connected all rings
+gpub058:1406059:1406116 [3] NCCL INFO Channel 00/0 : 67[c7000] -> 66[85000] via P2P/IPC
+gpub058:1406059:1406116 [3] NCCL INFO Channel 01/0 : 67[c7000] -> 66[85000] via P2P/IPC
+gpub058:1406059:1406116 [3] NCCL INFO Connected all trees
+gpub058:1406059:1406116 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub058:1406059:1406116 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub058:1406059:1406116 [3] NCCL INFO comm 0x5127edf0 rank 67 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub061:1342468:1342468 [2] NCCL INFO cudaDriverVersion 12010
+gpub061:1342468:1342468 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0>
+gpub061:1342468:1342468 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub061:1342468:1342540 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0>
+gpub061:1342468:1342540 [2] NCCL INFO Using network IB
+gpub061:1342468:1342540 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub061:1342468:1342540 [2] NCCL INFO Trees [0] 75/-1/-1->74->73 [1] 75/-1/-1->74->73
+gpub061:1342468:1342540 [2] NCCL INFO Channel 00/0 : 74[85000] -> 75[c7000] via P2P/IPC
+gpub061:1342468:1342540 [2] NCCL INFO Channel 01/0 : 74[85000] -> 75[c7000] via P2P/IPC
+gpub061:1342468:1342540 [2] NCCL INFO Connected all rings
+gpub061:1342468:1342540 [2] NCCL INFO Channel 00/0 : 74[85000] -> 73[46000] via P2P/IPC
+gpub061:1342468:1342540 [2] NCCL INFO Channel 01/0 : 74[85000] -> 73[46000] via P2P/IPC
+gpub061:1342468:1342540 [2] NCCL INFO Connected all trees
+gpub061:1342468:1342540 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub061:1342468:1342540 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub061:1342468:1342540 [2] NCCL INFO comm 0xa2ca84b0 rank 74 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub061:1342469:1342469 [3] NCCL INFO cudaDriverVersion 12010
+gpub061:1342469:1342469 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.161<0>
+gpub061:1342469:1342469 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub061:1342469:1342542 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.161<0>
+gpub061:1342469:1342542 [3] NCCL INFO Using network IB
+gpub061:1342469:1342542 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub061:1342469:1342542 [3] NCCL INFO Trees [0] -1/-1/-1->75->74 [1] -1/-1/-1->75->74
+gpub061:1342469:1342542 [3] NCCL INFO Channel 00/0 : 75[c7000] -> 76[7000] [send] via NET/IB/0
+gpub061:1342469:1342542 [3] NCCL INFO Channel 01/0 : 75[c7000] -> 76[7000] [send] via NET/IB/0
+gpub061:1342469:1342542 [3] NCCL INFO Connected all rings
+gpub061:1342469:1342542 [3] NCCL INFO Channel 00/0 : 75[c7000] -> 74[85000] via P2P/IPC
+gpub061:1342469:1342542 [3] NCCL INFO Channel 01/0 : 75[c7000] -> 74[85000] via P2P/IPC
+gpub075:323055:323055 [0] NCCL INFO cudaDriverVersion 12010
+gpub075:323055:323055 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:323055:323055 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:323055:323125 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:323055:323125 [0] NCCL INFO Using network IB
+gpub075:323055:323125 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub075:323055:323125 [0] NCCL INFO Trees [0] 97/112/-1->96->64 [1] 97/-1/-1->96->100
+gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 95[c7000] -> 96[7000] [receive] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 95[c7000] -> 96[7000] [receive] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 96[7000] -> 97[46000] via P2P/IPC
+gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 96[7000] -> 97[46000] via P2P/IPC
+gpub075:323055:323125 [0] NCCL INFO Connected all rings
+gpub061:1342469:1342542 [3] NCCL INFO Connected all trees
+gpub061:1342469:1342542 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub061:1342469:1342542 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub061:1342469:1342542 [3] NCCL INFO comm 0xb6128330 rank 75 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 96[7000] -> 100[7000] [send] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 96[7000] -> 112[7000] [send] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 64[7000] -> 96[7000] [receive] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 96[7000] -> 64[7000] [send] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 00/0 : 112[7000] -> 96[7000] [receive] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Channel 01/0 : 100[7000] -> 96[7000] [receive] via NET/IB/0
+gpub075:323055:323125 [0] NCCL INFO Connected all trees
+gpub075:323055:323125 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub075:323055:323125 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:323055:323125 [0] NCCL INFO comm 0x4fef3c60 rank 96 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub090:1179803:1179803 [0] NCCL INFO cudaDriverVersion 12010
+gpub090:1179803:1179803 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0>
+gpub090:1179803:1179803 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub090:1179803:1179871 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0>
+gpub090:1179803:1179871 [0] NCCL INFO Using network IB
+gpub090:1179803:1179871 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub090:1179803:1179871 [0] NCCL INFO Trees [0] 121/124/-1->120->112 [1] 121/-1/-1->120->117
+gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 119[c7000] -> 120[7000] [receive] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 119[c7000] -> 120[7000] [receive] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 120[7000] -> 121[46000] via P2P/IPC
+gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 120[7000] -> 121[46000] via P2P/IPC
+gpub090:1179803:1179871 [0] NCCL INFO Connected all rings
+gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 117[46000] -> 120[7000] [receive] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 120[7000] -> 124[7000] [send] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 112[7000] -> 120[7000] [receive] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 120[7000] -> 112[7000] [send] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 00/0 : 124[7000] -> 120[7000] [receive] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Channel 01/0 : 120[7000] -> 117[46000] [send] via NET/IB/0
+gpub090:1179803:1179871 [0] NCCL INFO Connected all trees
+gpub090:1179803:1179871 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub090:1179803:1179871 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub090:1179803:1179871 [0] NCCL INFO comm 0x521aa9c0 rank 120 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub080:3990802:3990802 [3] NCCL INFO cudaDriverVersion 12010
+gpub080:3990802:3990802 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:3990802:3990802 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:3990802:3990869 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:3990802:3990869 [3] NCCL INFO Using network IB
+gpub080:3990802:3990869 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub080:3990802:3990869 [3] NCCL INFO Trees [0] -1/-1/-1->103->102 [1] -1/-1/-1->103->102
+gpub080:3990802:3990869 [3] NCCL INFO Channel 00/0 : 103[c7000] -> 104[7000] [send] via NET/IB/0
+gpub080:3990802:3990869 [3] NCCL INFO Channel 01/0 : 103[c7000] -> 104[7000] [send] via NET/IB/0
+gpub080:3990802:3990869 [3] NCCL INFO Connected all rings
+gpub080:3990802:3990869 [3] NCCL INFO Channel 00/0 : 103[c7000] -> 102[85000] via P2P/IPC
+gpub080:3990802:3990869 [3] NCCL INFO Channel 01/0 : 103[c7000] -> 102[85000] via P2P/IPC
+gpub080:3990802:3990869 [3] NCCL INFO Connected all trees
+gpub080:3990802:3990869 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub080:3990802:3990869 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:3990802:3990869 [3] NCCL INFO comm 0xb6ed4500 rank 103 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub090:1179804:1179804 [1] NCCL INFO cudaDriverVersion 12010
+gpub090:1179804:1179804 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0>
+gpub090:1179804:1179804 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub090:1179804:1179870 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0>
+gpub090:1179804:1179870 [1] NCCL INFO Using network IB
+gpub090:1179804:1179870 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub090:1179804:1179870 [1] NCCL INFO Trees [0] 122/116/-1->121->120 [1] 122/-1/-1->121->120
+gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 121[46000] -> 122[85000] via P2P/IPC
+gpub090:1179804:1179870 [1] NCCL INFO Channel 01/0 : 121[46000] -> 122[85000] via P2P/IPC
+gpub090:1179804:1179870 [1] NCCL INFO Connected all rings
+gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 116[7000] -> 121[46000] [receive] via NET/IB/0
+gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 121[46000] -> 116[7000] [send] via NET/IB/0
+gpub090:1179804:1179870 [1] NCCL INFO Channel 00/0 : 121[46000] -> 120[7000] via P2P/IPC
+gpub090:1179804:1179870 [1] NCCL INFO Channel 01/0 : 121[46000] -> 120[7000] via P2P/IPC
+gpub090:1179804:1179870 [1] NCCL INFO Connected all trees
+gpub090:1179804:1179870 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub090:1179804:1179870 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub090:1179804:1179870 [1] NCCL INFO comm 0x50af0d60 rank 121 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub091:1688863:1688863 [1] NCCL INFO cudaDriverVersion 12010
+gpub091:1688863:1688863 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:1688863:1688863 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:1688863:1688931 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:1688863:1688931 [1] NCCL INFO Using network IB
+gpub091:1688863:1688931 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub091:1688863:1688931 [1] NCCL INFO Trees [0] 126/-1/-1->125->124 [1] 126/-1/-1->125->124
+gpub091:1688863:1688931 [1] NCCL INFO Channel 00/0 : 125[46000] -> 126[85000] via P2P/IPC
+gpub091:1688863:1688931 [1] NCCL INFO Channel 01/0 : 125[46000] -> 126[85000] via P2P/IPC
+gpub091:1688863:1688931 [1] NCCL INFO Connected all rings
+gpub091:1688863:1688931 [1] NCCL INFO Channel 00/0 : 125[46000] -> 124[7000] via P2P/IPC
+gpub091:1688863:1688931 [1] NCCL INFO Channel 01/0 : 125[46000] -> 124[7000] via P2P/IPC
+gpub091:1688863:1688931 [1] NCCL INFO Connected all trees
+gpub091:1688863:1688931 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub091:1688863:1688931 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:1688863:1688931 [1] NCCL INFO comm 0x50bc8450 rank 125 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub090:1179806:1179806 [3] NCCL INFO cudaDriverVersion 12010
+gpub090:1179806:1179806 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.190<0>
+gpub090:1179806:1179806 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub090:1179806:1179869 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.190<0>
+gpub090:1179806:1179869 [3] NCCL INFO Using network IB
+gpub090:1179806:1179869 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub090:1179806:1179869 [3] NCCL INFO Trees [0] -1/-1/-1->123->122 [1] -1/-1/-1->123->122
+gpub090:1179806:1179869 [3] NCCL INFO Channel 00/0 : 123[c7000] -> 124[7000] [send] via NET/IB/0
+gpub090:1179806:1179869 [3] NCCL INFO Channel 01/0 : 123[c7000] -> 124[7000] [send] via NET/IB/0
+gpub090:1179806:1179869 [3] NCCL INFO Connected all rings
+gpub090:1179806:1179869 [3] NCCL INFO Channel 00/0 : 123[c7000] -> 122[85000] via P2P/IPC
+gpub090:1179806:1179869 [3] NCCL INFO Channel 01/0 : 123[c7000] -> 122[85000] via P2P/IPC
+gpub090:1179806:1179869 [3] NCCL INFO Connected all trees
+gpub090:1179806:1179869 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub090:1179806:1179869 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub090:1179806:1179869 [3] NCCL INFO comm 0x5049f1b0 rank 123 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub058:1406057:1406057 [1] NCCL INFO cudaDriverVersion 12010
+gpub058:1406057:1406057 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0>
+gpub058:1406057:1406057 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub058:1406057:1406117 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0>
+gpub058:1406057:1406117 [1] NCCL INFO Using network IB
+gpub058:1406057:1406117 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub058:1406057:1406117 [1] NCCL INFO Trees [0] 66/32/-1->65->64 [1] 66/-1/-1->65->64
+gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 65[46000] -> 66[85000] via P2P/IPC
+gpub058:1406057:1406117 [1] NCCL INFO Channel 01/0 : 65[46000] -> 66[85000] via P2P/IPC
+gpub058:1406057:1406117 [1] NCCL INFO Connected all rings
+gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 32[7000] -> 65[46000] [receive] via NET/IB/0
+gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 65[46000] -> 32[7000] [send] via NET/IB/0
+gpub058:1406057:1406117 [1] NCCL INFO Channel 00/0 : 65[46000] -> 64[7000] via P2P/IPC
+gpub058:1406057:1406117 [1] NCCL INFO Channel 01/0 : 65[46000] -> 64[7000] via P2P/IPC
+gpub058:1406057:1406117 [1] NCCL INFO Connected all trees
+gpub058:1406057:1406117 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub058:1406057:1406117 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub058:1406057:1406117 [1] NCCL INFO comm 0x503d1e40 rank 65 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub091:1688865:1688865 [3] NCCL INFO cudaDriverVersion 12010
+gpub091:1688865:1688865 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:1688865:1688865 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:1688865:1688929 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:1688865:1688929 [3] NCCL INFO Using network IB
+gpub091:1688865:1688929 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub091:1688865:1688929 [3] NCCL INFO Trees [0] -1/-1/-1->127->126 [1] -1/-1/-1->127->126
+gpub091:1688865:1688929 [3] NCCL INFO Channel 00/0 : 127[c7000] -> 0[7000] [send] via NET/IB/0
+gpub091:1688865:1688929 [3] NCCL INFO Channel 01/0 : 127[c7000] -> 0[7000] [send] via NET/IB/0
+gpub091:1688865:1688929 [3] NCCL INFO Connected all rings
+gpub091:1688865:1688929 [3] NCCL INFO Channel 00/0 : 127[c7000] -> 126[85000] via P2P/IPC
+gpub091:1688865:1688929 [3] NCCL INFO Channel 01/0 : 127[c7000] -> 126[85000] via P2P/IPC
+gpub091:1688865:1688929 [3] NCCL INFO Connected all trees
+gpub091:1688865:1688929 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub091:1688865:1688929 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:1688865:1688929 [3] NCCL INFO comm 0x8ce193d0 rank 127 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub001:279949:279949 [1] NCCL INFO cudaDriverVersion 12010
+gpub001:279949:279949 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:279949:279949 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:279949:280015 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:279949:280015 [1] NCCL INFO Using network IB
+gpub001:279949:280015 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub001:279949:280015 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub001:279949:280015 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:279949:280015 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:279949:280015 [1] NCCL INFO Connected all rings
+gpub001:279949:280015 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:279949:280015 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:279949:280015 [1] NCCL INFO Connected all trees
+gpub001:279949:280015 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub001:279949:280015 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:279949:280015 [1] NCCL INFO comm 0x8b9ccb80 rank 1 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub035:2421441:2421441 [0] NCCL INFO cudaDriverVersion 12010
+gpub035:2421441:2421441 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0>
+gpub035:2421441:2421441 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub035:2421441:2421510 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0>
+gpub035:2421441:2421510 [0] NCCL INFO Using network IB
+gpub035:2421441:2421510 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub035:2421441:2421510 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub035:2421441:2421510 [0] NCCL INFO Connected all rings
+gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub035:2421441:2421510 [0] NCCL INFO Connected all trees
+gpub035:2421441:2421510 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub035:2421441:2421510 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub035:2421441:2421510 [0] NCCL INFO comm 0x8ca3dc20 rank 40 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub001:279951:279951 [3] NCCL INFO cudaDriverVersion 12010
+gpub001:279951:279951 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:279951:279951 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:279951:280014 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:279951:280014 [3] NCCL INFO Using network IB
+gpub001:279951:280014 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub001:279951:280014 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub001:279951:280014 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:279951:280014 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:279951:280014 [3] NCCL INFO Connected all rings
+gpub001:279951:280014 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:279951:280014 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:279951:280014 [3] NCCL INFO Connected all trees
+gpub001:279951:280014 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub001:279951:280014 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:279951:280014 [3] NCCL INFO comm 0x8d9eaa40 rank 3 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub085:1471917:1471917 [1] NCCL INFO cudaDriverVersion 12010
+gpub085:1471917:1471917 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0>
+gpub085:1471917:1471917 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub085:1471917:1471987 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0>
+gpub085:1471917:1471987 [1] NCCL INFO Using network IB
+gpub085:1471917:1471987 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub085:1471917:1471987 [1] NCCL INFO Trees [0] 110/-1/-1->109->108 [1] 110/116/-1->109->108
+gpub085:1471917:1471987 [1] NCCL INFO Channel 00/0 : 109[46000] -> 110[85000] via P2P/IPC
+gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 109[46000] -> 110[85000] via P2P/IPC
+gpub085:1471917:1471987 [1] NCCL INFO Connected all rings
+gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 109[46000] -> 116[7000] [send] via NET/IB/0
+gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 116[7000] -> 109[46000] [receive] via NET/IB/0
+gpub085:1471917:1471987 [1] NCCL INFO Channel 00/0 : 109[46000] -> 108[7000] via P2P/IPC
+gpub085:1471917:1471987 [1] NCCL INFO Channel 01/0 : 109[46000] -> 108[7000] via P2P/IPC
+gpub085:1471917:1471987 [1] NCCL INFO Connected all trees
+gpub085:1471917:1471987 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub085:1471917:1471987 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub085:1471917:1471987 [1] NCCL INFO comm 0x9704c80 rank 109 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub085:1471918:1471918 [2] NCCL INFO cudaDriverVersion 12010
+gpub085:1471918:1471918 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0>
+gpub085:1471918:1471918 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub085:1471918:1471989 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0>
+gpub085:1471918:1471989 [2] NCCL INFO Using network IB
+gpub085:1471918:1471989 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub085:1471918:1471989 [2] NCCL INFO Trees [0] 111/-1/-1->110->109 [1] 111/-1/-1->110->109
+gpub085:1471918:1471989 [2] NCCL INFO Channel 00/0 : 110[85000] -> 111[c7000] via P2P/IPC
+gpub085:1471918:1471989 [2] NCCL INFO Channel 01/0 : 110[85000] -> 111[c7000] via P2P/IPC
+gpub085:1471918:1471989 [2] NCCL INFO Connected all rings
+gpub085:1471918:1471989 [2] NCCL INFO Channel 00/0 : 110[85000] -> 109[46000] via P2P/IPC
+gpub085:1471918:1471989 [2] NCCL INFO Channel 01/0 : 110[85000] -> 109[46000] via P2P/IPC
+gpub085:1471918:1471989 [2] NCCL INFO Connected all trees
+gpub085:1471918:1471989 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub085:1471918:1471989 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub085:1471918:1471989 [2] NCCL INFO comm 0x4fbd7c40 rank 110 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub085:1471916:1471916 [0] NCCL INFO cudaDriverVersion 12010
+gpub085:1471916:1471916 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0>
+gpub085:1471916:1471916 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub085:1471916:1471986 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0>
+gpub085:1471916:1471986 [0] NCCL INFO Using network IB
+gpub085:1471916:1471986 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub085:1471916:1471986 [0] NCCL INFO Trees [0] 109/-1/-1->108->104 [1] 109/100/-1->108->93
+gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 107[c7000] -> 108[7000] [receive] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 107[c7000] -> 108[7000] [receive] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 108[7000] -> 109[46000] via P2P/IPC
+gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 108[7000] -> 109[46000] via P2P/IPC
+gpub085:1471916:1471986 [0] NCCL INFO Connected all rings
+gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 104[7000] -> 108[7000] [receive] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 100[7000] -> 108[7000] [receive] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 93[46000] -> 108[7000] [receive] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 108[7000] -> 93[46000] [send] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 01/0 : 108[7000] -> 100[7000] [send] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Channel 00/0 : 108[7000] -> 104[7000] [send] via NET/IB/0
+gpub085:1471916:1471986 [0] NCCL INFO Connected all trees
+gpub085:1471916:1471986 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub085:1471916:1471986 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub085:1471916:1471986 [0] NCCL INFO comm 0xb90085a0 rank 108 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub068:1244812:1244812 [0] NCCL INFO cudaDriverVersion 12010
+gpub068:1244812:1244812 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0>
+gpub068:1244812:1244812 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub068:1244812:1244877 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0>
+gpub068:1244812:1244877 [0] NCCL INFO Using network IB
+gpub068:1244812:1244877 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub068:1244812:1244877 [0] NCCL INFO Trees [0] 93/-1/-1->92->88 [1] 93/76/-1->92->61
+gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 91[c7000] -> 92[7000] [receive] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 91[c7000] -> 92[7000] [receive] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 92[7000] -> 93[46000] via P2P/IPC
+gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 92[7000] -> 93[46000] via P2P/IPC
+gpub068:1244812:1244877 [0] NCCL INFO Connected all rings
+gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 88[7000] -> 92[7000] [receive] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 76[7000] -> 92[7000] [receive] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 61[46000] -> 92[7000] [receive] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 92[7000] -> 61[46000] [send] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 01/0 : 92[7000] -> 76[7000] [send] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Channel 00/0 : 92[7000] -> 88[7000] [send] via NET/IB/0
+gpub068:1244812:1244877 [0] NCCL INFO Connected all trees
+gpub068:1244812:1244877 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub068:1244812:1244877 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub068:1244812:1244877 [0] NCCL INFO comm 0x4fce69e0 rank 92 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub068:1244813:1244813 [1] NCCL INFO cudaDriverVersion 12010
+gpub068:1244813:1244813 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.168<0>
+gpub068:1244813:1244813 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub068:1244813:1244876 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.168<0>
+gpub068:1244813:1244876 [1] NCCL INFO Using network IB
+gpub068:1244813:1244876 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub068:1244813:1244876 [1] NCCL INFO Trees [0] 94/-1/-1->93->92 [1] 94/108/-1->93->92
+gpub068:1244813:1244876 [1] NCCL INFO Channel 00/0 : 93[46000] -> 94[85000] via P2P/IPC
+gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 93[46000] -> 94[85000] via P2P/IPC
+gpub068:1244813:1244876 [1] NCCL INFO Connected all rings
+gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 93[46000] -> 108[7000] [send] via NET/IB/0
+gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 108[7000] -> 93[46000] [receive] via NET/IB/0
+gpub068:1244813:1244876 [1] NCCL INFO Channel 00/0 : 93[46000] -> 92[7000] via P2P/IPC
+gpub068:1244813:1244876 [1] NCCL INFO Channel 01/0 : 93[46000] -> 92[7000] via P2P/IPC
+gpub068:1244813:1244876 [1] NCCL INFO Connected all trees
+gpub068:1244813:1244876 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub068:1244813:1244876 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub068:1244813:1244876 [1] NCCL INFO comm 0x510f96c0 rank 93 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub015:699660:699660 [3] NCCL INFO cudaDriverVersion 12010
+gpub015:699660:699660 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:699660:699660 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:699660:699721 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:699660:699721 [3] NCCL INFO Using network IB
+gpub015:699660:699721 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub015:699660:699721 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub015:699660:699721 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub015:699660:699721 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub015:699660:699721 [3] NCCL INFO Connected all rings
+gpub015:699660:699721 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub015:699660:699721 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub015:699660:699721 [3] NCCL INFO Connected all trees
+gpub015:699660:699721 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub015:699660:699721 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:699660:699721 [3] NCCL INFO comm 0x4f795830 rank 27 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub085:1471919:1471919 [3] NCCL INFO cudaDriverVersion 12010
+gpub085:1471919:1471919 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.185<0>
+gpub085:1471919:1471919 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub085:1471919:1471988 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.185<0>
+gpub085:1471919:1471988 [3] NCCL INFO Using network IB
+gpub085:1471919:1471988 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub085:1471919:1471988 [3] NCCL INFO Trees [0] -1/-1/-1->111->110 [1] -1/-1/-1->111->110
+gpub085:1471919:1471988 [3] NCCL INFO Channel 00/0 : 111[c7000] -> 112[7000] [send] via NET/IB/0
+gpub085:1471919:1471988 [3] NCCL INFO Channel 01/0 : 111[c7000] -> 112[7000] [send] via NET/IB/0
+gpub085:1471919:1471988 [3] NCCL INFO Connected all rings
+gpub085:1471919:1471988 [3] NCCL INFO Channel 00/0 : 111[c7000] -> 110[85000] via P2P/IPC
+gpub085:1471919:1471988 [3] NCCL INFO Channel 01/0 : 111[c7000] -> 110[85000] via P2P/IPC
+gpub085:1471919:1471988 [3] NCCL INFO Connected all trees
+gpub085:1471919:1471988 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub085:1471919:1471988 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub085:1471919:1471988 [3] NCCL INFO comm 0xb79c1300 rank 111 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub041:1218042:1218042 [3] NCCL INFO cudaDriverVersion 12010
+gpub041:1218042:1218042 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1218042:1218042 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1218042:1218107 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1218042:1218107 [3] NCCL INFO Using network IB
+gpub041:1218042:1218107 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub041:1218042:1218107 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub041:1218042:1218107 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 64[7000] [send] via NET/IB/0
+gpub041:1218042:1218107 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 64[7000] [send] via NET/IB/0
+gpub041:1218042:1218107 [3] NCCL INFO Connected all rings
+gpub041:1218042:1218107 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub041:1218042:1218107 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub041:1218042:1218107 [3] NCCL INFO Connected all trees
+gpub041:1218042:1218107 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub041:1218042:1218107 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1218042:1218107 [3] NCCL INFO comm 0x94448880 rank 63 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub058:1406056:1406056 [0] NCCL INFO cudaDriverVersion 12010
+gpub058:1406056:1406056 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.158<0>
+gpub058:1406056:1406056 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub058:1406056:1406118 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.158<0>
+gpub058:1406056:1406118 [0] NCCL INFO Using network IB
+gpub058:1406056:1406118 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub058:1406056:1406118 [0] NCCL INFO Trees [0] 65/96/-1->64->0 [1] 65/-1/-1->64->68
+gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 64[7000] [receive] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 64[7000] [receive] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 64[7000] -> 65[46000] via P2P/IPC
+gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 64[7000] -> 65[46000] via P2P/IPC
+gpub058:1406056:1406118 [0] NCCL INFO Connected all rings
+gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 64[7000] -> 68[7000] [send] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 64[7000] -> 96[7000] [send] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 0[7000] -> 64[7000] [receive] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 64[7000] -> 0[7000] [send] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 00/0 : 96[7000] -> 64[7000] [receive] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Channel 01/0 : 68[7000] -> 64[7000] [receive] via NET/IB/0
+gpub058:1406056:1406118 [0] NCCL INFO Connected all trees
+gpub058:1406056:1406118 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub058:1406056:1406118 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub058:1406056:1406118 [0] NCCL INFO comm 0x9044470 rank 64 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub041:1218040:1218040 [1] NCCL INFO cudaDriverVersion 12010
+gpub041:1218040:1218040 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1218040:1218040 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1218040:1218108 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1218040:1218108 [1] NCCL INFO Using network IB
+gpub041:1218040:1218108 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub041:1218040:1218108 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/92/-1->61->60
+gpub041:1218040:1218108 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub041:1218040:1218108 [1] NCCL INFO Connected all rings
+gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 61[46000] -> 92[7000] [send] via NET/IB/0
+gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 92[7000] -> 61[46000] [receive] via NET/IB/0
+gpub015:699658:699658 [1] NCCL INFO cudaDriverVersion 12010
+gpub015:699658:699658 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:699658:699658 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:699658:699719 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:699658:699719 [1] NCCL INFO Using network IB
+gpub015:699658:699719 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub015:699658:699719 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub015:699658:699719 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub015:699658:699719 [1] NCCL INFO Connected all rings
+gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub015:699658:699719 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub015:699658:699719 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub015:699658:699719 [1] NCCL INFO Connected all trees
+gpub015:699658:699719 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub015:699658:699719 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:699658:699719 [1] NCCL INFO comm 0x4ff5faa0 rank 25 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub041:1218040:1218108 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub041:1218040:1218108 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub041:1218040:1218108 [1] NCCL INFO Connected all trees
+gpub041:1218040:1218108 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub041:1218040:1218108 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1218040:1218108 [1] NCCL INFO comm 0x8e9b8f0 rank 61 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub015:699657:699657 [0] NCCL INFO cudaDriverVersion 12010
+gpub015:699657:699657 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:699657:699657 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:699657:699718 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:699657:699718 [0] NCCL INFO Using network IB
+gpub015:699657:699718 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub015:699657:699718 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub015:699657:699718 [0] NCCL INFO Connected all rings
+gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub015:699657:699718 [0] NCCL INFO Connected all trees
+gpub015:699657:699718 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub015:699657:699718 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:699657:699718 [0] NCCL INFO comm 0x50bba4a0 rank 24 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub080:3990799:3990799 [0] NCCL INFO cudaDriverVersion 12010
+gpub080:3990799:3990799 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:3990799:3990799 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:3990799:3990868 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:3990799:3990868 [0] NCCL INFO Using network IB
+gpub080:3990799:3990868 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub080:3990799:3990868 [0] NCCL INFO Trees [0] 101/-1/-1->100->105 [1] 101/96/-1->100->108
+gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 99[c7000] -> 100[7000] [receive] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 99[c7000] -> 100[7000] [receive] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 100[7000] -> 101[46000] via P2P/IPC
+gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 100[7000] -> 101[46000] via P2P/IPC
+gpub080:3990799:3990868 [0] NCCL INFO Connected all rings
+gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 96[7000] -> 100[7000] [receive] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 100[7000] -> 105[46000] [send] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 100[7000] -> 108[7000] [send] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 108[7000] -> 100[7000] [receive] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 00/0 : 105[46000] -> 100[7000] [receive] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Channel 01/0 : 100[7000] -> 96[7000] [send] via NET/IB/0
+gpub080:3990799:3990868 [0] NCCL INFO Connected all trees
+gpub080:3990799:3990868 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub080:3990799:3990868 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:3990799:3990868 [0] NCCL INFO comm 0x8be5af20 rank 100 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub015:699659:699659 [2] NCCL INFO cudaDriverVersion 12010
+gpub015:699659:699659 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:699659:699659 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:699659:699720 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:699659:699720 [2] NCCL INFO Using network IB
+gpub015:699659:699720 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub015:699659:699720 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub015:699659:699720 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub015:699659:699720 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub015:699659:699720 [2] NCCL INFO Connected all rings
+gpub015:699659:699720 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub015:699659:699720 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub015:699659:699720 [2] NCCL INFO Connected all trees
+gpub015:699659:699720 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub015:699659:699720 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:699659:699720 [2] NCCL INFO comm 0x504aba10 rank 26 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub080:3990800:3990800 [1] NCCL INFO cudaDriverVersion 12010
+gpub080:3990800:3990800 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:3990800:3990800 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:3990800:3990871 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:3990800:3990871 [1] NCCL INFO Using network IB
+gpub080:3990800:3990871 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub080:3990800:3990871 [1] NCCL INFO Trees [0] 102/-1/-1->101->100 [1] 102/104/-1->101->100
+gpub080:3990800:3990871 [1] NCCL INFO Channel 00/0 : 101[46000] -> 102[85000] via P2P/IPC
+gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 101[46000] -> 102[85000] via P2P/IPC
+gpub080:3990800:3990871 [1] NCCL INFO Connected all rings
+gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 101[46000] -> 104[7000] [send] via NET/IB/0
+gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 104[7000] -> 101[46000] [receive] via NET/IB/0
+gpub080:3990800:3990871 [1] NCCL INFO Channel 00/0 : 101[46000] -> 100[7000] via P2P/IPC
+gpub080:3990800:3990871 [1] NCCL INFO Channel 01/0 : 101[46000] -> 100[7000] via P2P/IPC
+gpub080:3990800:3990871 [1] NCCL INFO Connected all trees
+gpub080:3990800:3990871 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub080:3990800:3990871 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:3990800:3990871 [1] NCCL INFO comm 0xb59713a0 rank 101 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub080:3990801:3990801 [2] NCCL INFO cudaDriverVersion 12010
+gpub080:3990801:3990801 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:3990801:3990801 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:3990801:3990870 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:3990801:3990870 [2] NCCL INFO Using network IB
+gpub080:3990801:3990870 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub080:3990801:3990870 [2] NCCL INFO Trees [0] 103/-1/-1->102->101 [1] 103/-1/-1->102->101
+gpub080:3990801:3990870 [2] NCCL INFO Channel 00/0 : 102[85000] -> 103[c7000] via P2P/IPC
+gpub080:3990801:3990870 [2] NCCL INFO Channel 01/0 : 102[85000] -> 103[c7000] via P2P/IPC
+gpub080:3990801:3990870 [2] NCCL INFO Connected all rings
+gpub080:3990801:3990870 [2] NCCL INFO Channel 00/0 : 102[85000] -> 101[46000] via P2P/IPC
+gpub080:3990801:3990870 [2] NCCL INFO Channel 01/0 : 102[85000] -> 101[46000] via P2P/IPC
+gpub080:3990801:3990870 [2] NCCL INFO Connected all trees
+gpub080:3990801:3990870 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub080:3990801:3990870 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:3990801:3990870 [2] NCCL INFO comm 0x52129500 rank 102 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub041:1218039:1218039 [0] NCCL INFO cudaDriverVersion 12010
+gpub041:1218039:1218039 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1218039:1218039 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1218039:1218106 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1218039:1218106 [0] NCCL INFO Using network IB
+gpub041:1218039:1218106 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub041:1218039:1218106 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->124
+gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub041:1218039:1218106 [0] NCCL INFO Connected all rings
+gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 124[7000] -> 60[7000] [receive] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 60[7000] -> 124[7000] [send] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub041:1218039:1218106 [0] NCCL INFO Connected all trees
+gpub041:1218039:1218106 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub041:1218039:1218106 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1218039:1218106 [0] NCCL INFO comm 0x520cee30 rank 60 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub041:1218041:1218041 [2] NCCL INFO cudaDriverVersion 12010
+gpub041:1218041:1218041 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1218041:1218041 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1218041:1218109 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1218041:1218109 [2] NCCL INFO Using network IB
+gpub041:1218041:1218109 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub041:1218041:1218109 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub041:1218041:1218109 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub041:1218041:1218109 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub041:1218041:1218109 [2] NCCL INFO Connected all rings
+gpub041:1218041:1218109 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub041:1218041:1218109 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub041:1218041:1218109 [2] NCCL INFO Connected all trees
+gpub041:1218041:1218109 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub041:1218041:1218109 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1218041:1218109 [2] NCCL INFO comm 0x8dfe3560 rank 62 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub035:2421442:2421442 [1] NCCL INFO cudaDriverVersion 12010
+gpub035:2421442:2421442 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0>
+gpub035:2421442:2421442 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub035:2421442:2421509 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0>
+gpub035:2421442:2421509 [1] NCCL INFO Using network IB
+gpub035:2421442:2421509 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub035:2421442:2421509 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub035:2421442:2421509 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub035:2421442:2421509 [1] NCCL INFO Connected all rings
+gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub035:2421442:2421509 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub035:2421442:2421509 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub035:2421442:2421509 [1] NCCL INFO Connected all trees
+gpub035:2421442:2421509 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub035:2421442:2421509 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub035:2421442:2421509 [1] NCCL INFO comm 0x9bdcb70 rank 41 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub035:2421444:2421444 [3] NCCL INFO cudaDriverVersion 12010
+gpub035:2421444:2421444 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0>
+gpub035:2421444:2421444 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub035:2421444:2421508 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0>
+gpub035:2421444:2421508 [3] NCCL INFO Using network IB
+gpub035:2421444:2421508 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub035:2421444:2421508 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub035:2421444:2421508 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub035:2421444:2421508 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub035:2421444:2421508 [3] NCCL INFO Connected all rings
+gpub035:2421444:2421508 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub035:2421444:2421508 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub035:2421444:2421508 [3] NCCL INFO Connected all trees
+gpub035:2421444:2421508 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub035:2421444:2421508 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub035:2421444:2421508 [3] NCCL INFO comm 0x93391d0 rank 43 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub035:2421443:2421443 [2] NCCL INFO cudaDriverVersion 12010
+gpub035:2421443:2421443 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.135<0>
+gpub035:2421443:2421443 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub035:2421443:2421507 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.135<0>
+gpub035:2421443:2421507 [2] NCCL INFO Using network IB
+gpub035:2421443:2421507 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub035:2421443:2421507 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub035:2421443:2421507 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub035:2421443:2421507 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub035:2421443:2421507 [2] NCCL INFO Connected all rings
+gpub035:2421443:2421507 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub035:2421443:2421507 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub035:2421443:2421507 [2] NCCL INFO Connected all trees
+gpub035:2421443:2421507 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub035:2421443:2421507 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub035:2421443:2421507 [2] NCCL INFO comm 0x91022d0 rank 42 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub016:1262187:1262187 [0] NCCL INFO cudaDriverVersion 12010
+gpub016:1262187:1262187 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1262187:1262187 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1262187:1262261 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1262187:1262261 [0] NCCL INFO Using network IB
+gpub016:1262187:1262261 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub016:1262187:1262261 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub016:1262187:1262261 [0] NCCL INFO Connected all rings
+gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub016:1262187:1262261 [0] NCCL INFO Connected all trees
+gpub016:1262187:1262261 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub016:1262187:1262261 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1262187:1262261 [0] NCCL INFO comm 0x2218f980 rank 28 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub016:1262190:1262190 [3] NCCL INFO cudaDriverVersion 12010
+gpub016:1262190:1262190 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1262190:1262190 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1262190:1262260 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1262190:1262260 [3] NCCL INFO Using network IB
+gpub016:1262190:1262260 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub016:1262190:1262260 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub016:1262190:1262260 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub016:1262190:1262260 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub016:1262190:1262260 [3] NCCL INFO Connected all rings
+gpub016:1262190:1262260 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub016:1262190:1262260 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub016:1262190:1262260 [3] NCCL INFO Connected all trees
+gpub016:1262190:1262260 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub016:1262190:1262260 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1262190:1262260 [3] NCCL INFO comm 0xa7c96f60 rank 31 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub031:1764393:1764393 [0] NCCL INFO cudaDriverVersion 12010
+gpub031:1764393:1764393 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1764393:1764393 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1764393:1764461 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1764393:1764461 [0] NCCL INFO Using network IB
+gpub031:1764393:1764461 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub031:1764393:1764461 [0] NCCL INFO Trees [0] 33/48/-1->32->65 [1] 33/-1/-1->32->36
+gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub031:1764393:1764461 [0] NCCL INFO Connected all rings
+gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 65[46000] [send] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 65[46000] -> 32[7000] [receive] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub031:1764393:1764461 [0] NCCL INFO Connected all trees
+gpub031:1764393:1764461 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub031:1764393:1764461 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1764393:1764461 [0] NCCL INFO comm 0xb6eeceb0 rank 32 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub016:1262189:1262189 [2] NCCL INFO cudaDriverVersion 12010
+gpub016:1262189:1262189 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1262189:1262189 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1262189:1262263 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1262189:1262263 [2] NCCL INFO Using network IB
+gpub016:1262189:1262263 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub016:1262189:1262263 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub016:1262189:1262263 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub016:1262189:1262263 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub016:1262189:1262263 [2] NCCL INFO Connected all rings
+gpub016:1262189:1262263 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub016:1262189:1262263 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub016:1262189:1262263 [2] NCCL INFO Connected all trees
+gpub016:1262189:1262263 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub016:1262189:1262263 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1262189:1262263 [2] NCCL INFO comm 0x503afdd0 rank 30 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub011:1300883:1300883 [3] NCCL INFO cudaDriverVersion 12010
+gpub011:1300883:1300883 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1300883:1300883 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1300883:1300950 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1300883:1300950 [3] NCCL INFO Using network IB
+gpub011:1300883:1300950 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub011:1300883:1300950 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub011:1300883:1300950 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub011:1300883:1300950 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub011:1300883:1300950 [3] NCCL INFO Connected all rings
+gpub011:1300883:1300950 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub011:1300883:1300950 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub031:1764395:1764395 [2] NCCL INFO cudaDriverVersion 12010
+gpub031:1764395:1764395 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1764395:1764395 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1764395:1764462 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1764395:1764462 [2] NCCL INFO Using network IB
+gpub031:1764395:1764462 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub031:1764395:1764462 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub031:1764395:1764462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub031:1764395:1764462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub031:1764395:1764462 [2] NCCL INFO Connected all rings
+gpub031:1764395:1764462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub031:1764395:1764462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub011:1300883:1300950 [3] NCCL INFO Connected all trees
+gpub011:1300883:1300950 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub011:1300883:1300950 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1300883:1300950 [3] NCCL INFO comm 0xb778d540 rank 11 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub031:1764395:1764462 [2] NCCL INFO Connected all trees
+gpub031:1764395:1764462 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub031:1764395:1764462 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1764395:1764462 [2] NCCL INFO comm 0x8d6e3c20 rank 34 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub011:1300880:1300880 [0] NCCL INFO cudaDriverVersion 12010
+gpub011:1300880:1300880 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1300880:1300880 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1300880:1300948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1300880:1300948 [0] NCCL INFO Using network IB
+gpub011:1300880:1300948 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub011:1300880:1300948 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub011:1300880:1300948 [0] NCCL INFO Connected all rings
+gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub011:1300880:1300948 [0] NCCL INFO Connected all trees
+gpub011:1300880:1300948 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub011:1300880:1300948 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1300880:1300948 [0] NCCL INFO comm 0x50df64a0 rank 8 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub031:1764394:1764394 [1] NCCL INFO cudaDriverVersion 12010
+gpub031:1764394:1764394 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1764394:1764394 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1764394:1764464 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1764394:1764464 [1] NCCL INFO Using network IB
+gpub031:1764394:1764464 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub031:1764394:1764464 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub031:1764394:1764464 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub031:1764394:1764464 [1] NCCL INFO Connected all rings
+gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub031:1764394:1764464 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub031:1764394:1764464 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub031:1764394:1764464 [1] NCCL INFO Connected all trees
+gpub031:1764394:1764464 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub031:1764394:1764464 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1764394:1764464 [1] NCCL INFO comm 0x514068e0 rank 33 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub011:1300881:1300881 [1] NCCL INFO cudaDriverVersion 12010
+gpub011:1300881:1300881 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1300881:1300881 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1300881:1300947 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1300881:1300947 [1] NCCL INFO Using network IB
+gpub011:1300881:1300947 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub011:1300881:1300947 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub011:1300881:1300947 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub011:1300881:1300947 [1] NCCL INFO Connected all rings
+gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub011:1300881:1300947 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub011:1300881:1300947 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub011:1300881:1300947 [1] NCCL INFO Connected all trees
+gpub011:1300881:1300947 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub011:1300881:1300947 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1300881:1300947 [1] NCCL INFO comm 0xb0451f0 rank 9 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub031:1764396:1764396 [3] NCCL INFO cudaDriverVersion 12010
+gpub031:1764396:1764396 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1764396:1764396 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1764396:1764463 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1764396:1764463 [3] NCCL INFO Using network IB
+gpub031:1764396:1764463 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub031:1764396:1764463 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub031:1764396:1764463 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub031:1764396:1764463 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub031:1764396:1764463 [3] NCCL INFO Connected all rings
+gpub031:1764396:1764463 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub031:1764396:1764463 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub031:1764396:1764463 [3] NCCL INFO Connected all trees
+gpub031:1764396:1764463 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub031:1764396:1764463 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1764396:1764463 [3] NCCL INFO comm 0x9dc35740 rank 35 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub016:1262188:1262188 [1] NCCL INFO cudaDriverVersion 12010
+gpub016:1262188:1262188 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1262188:1262188 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1262188:1262262 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1262188:1262262 [1] NCCL INFO Using network IB
+gpub016:1262188:1262262 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub016:1262188:1262262 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub016:1262188:1262262 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub016:1262188:1262262 [1] NCCL INFO Connected all rings
+gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub016:1262188:1262262 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub016:1262188:1262262 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub016:1262188:1262262 [1] NCCL INFO Connected all trees
+gpub016:1262188:1262262 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub016:1262188:1262262 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1262188:1262262 [1] NCCL INFO comm 0x8bac38c0 rank 29 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub011:1300882:1300882 [2] NCCL INFO cudaDriverVersion 12010
+gpub011:1300882:1300882 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1300882:1300882 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1300882:1300949 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1300882:1300949 [2] NCCL INFO Using network IB
+gpub011:1300882:1300949 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub011:1300882:1300949 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub011:1300882:1300949 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub011:1300882:1300949 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub011:1300882:1300949 [2] NCCL INFO Connected all rings
+gpub011:1300882:1300949 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub011:1300882:1300949 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub011:1300882:1300949 [2] NCCL INFO Connected all trees
+gpub011:1300882:1300949 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub011:1300882:1300949 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1300882:1300949 [2] NCCL INFO comm 0x507d2a60 rank 10 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub040:1881001:1881001 [2] NCCL INFO cudaDriverVersion 12010
+gpub040:1881001:1881001 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:1881001:1881001 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:1881001:1881063 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:1881001:1881063 [2] NCCL INFO Using network IB
+gpub040:1881001:1881063 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub040:1881001:1881063 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub040:1881001:1881063 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub040:1881001:1881063 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub040:1881001:1881063 [2] NCCL INFO Connected all rings
+gpub040:1881001:1881063 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub040:1881001:1881063 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub040:1881001:1881063 [2] NCCL INFO Connected all trees
+gpub040:1881001:1881063 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub040:1881001:1881063 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:1881001:1881063 [2] NCCL INFO comm 0x930e4a0 rank 58 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub066:1330380:1330380 [2] NCCL INFO cudaDriverVersion 12010
+gpub066:1330380:1330380 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1330380:1330380 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1330380:1330441 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1330380:1330441 [2] NCCL INFO Using network IB
+gpub066:1330380:1330441 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub066:1330380:1330441 [2] NCCL INFO Trees [0] 87/-1/-1->86->85 [1] 87/-1/-1->86->85
+gpub066:1330380:1330441 [2] NCCL INFO Channel 00/0 : 86[85000] -> 87[c7000] via P2P/IPC
+gpub066:1330380:1330441 [2] NCCL INFO Channel 01/0 : 86[85000] -> 87[c7000] via P2P/IPC
+gpub066:1330380:1330441 [2] NCCL INFO Connected all rings
+gpub066:1330380:1330441 [2] NCCL INFO Channel 00/0 : 86[85000] -> 85[46000] via P2P/IPC
+gpub066:1330380:1330441 [2] NCCL INFO Channel 01/0 : 86[85000] -> 85[46000] via P2P/IPC
+gpub066:1330380:1330441 [2] NCCL INFO Connected all trees
+gpub066:1330380:1330441 [2] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub066:1330380:1330441 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1330380:1330441 [2] NCCL INFO comm 0xb68349c0 rank 86 nranks 128 cudaDev 2 busId 85000 - Init COMPLETE
+gpub040:1881002:1881002 [3] NCCL INFO cudaDriverVersion 12010
+gpub040:1881002:1881002 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:1881002:1881002 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:1881002:1881064 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:1881002:1881064 [3] NCCL INFO Using network IB
+gpub040:1881002:1881064 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub040:1881002:1881064 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub040:1881002:1881064 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub040:1881002:1881064 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub040:1881002:1881064 [3] NCCL INFO Connected all rings
+gpub040:1881002:1881064 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub040:1881002:1881064 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub040:1881002:1881064 [3] NCCL INFO Connected all trees
+gpub040:1881002:1881064 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub040:1881002:1881064 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:1881002:1881064 [3] NCCL INFO comm 0x8c834280 rank 59 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub040:1880999:1880999 [0] NCCL INFO cudaDriverVersion 12010
+gpub040:1880999:1880999 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:1880999:1880999 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:1880999:1881065 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:1880999:1881065 [0] NCCL INFO Using network IB
+gpub040:1880999:1881065 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub040:1880999:1881065 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub040:1880999:1881065 [0] NCCL INFO Connected all rings
+gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub040:1880999:1881065 [0] NCCL INFO Connected all trees
+gpub040:1880999:1881065 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub040:1880999:1881065 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:1880999:1881065 [0] NCCL INFO comm 0x8da18860 rank 56 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub040:1881000:1881000 [1] NCCL INFO cudaDriverVersion 12010
+gpub040:1881000:1881000 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:1881000:1881000 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:1881000:1881066 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:1881000:1881066 [1] NCCL INFO Using network IB
+gpub040:1881000:1881066 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub040:1881000:1881066 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub040:1881000:1881066 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub040:1881000:1881066 [1] NCCL INFO Connected all rings
+gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub040:1881000:1881066 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub040:1881000:1881066 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub040:1881000:1881066 [1] NCCL INFO Connected all trees
+gpub040:1881000:1881066 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub040:1881000:1881066 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:1881000:1881066 [1] NCCL INFO comm 0x9d50600 rank 57 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+gpub066:1330381:1330381 [3] NCCL INFO cudaDriverVersion 12010
+gpub066:1330381:1330381 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1330381:1330381 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1330381:1330442 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1330381:1330442 [3] NCCL INFO Using network IB
+gpub066:1330381:1330442 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub066:1330381:1330442 [3] NCCL INFO Trees [0] -1/-1/-1->87->86 [1] -1/-1/-1->87->86
+gpub066:1330381:1330442 [3] NCCL INFO Channel 00/0 : 87[c7000] -> 88[7000] [send] via NET/IB/0
+gpub066:1330381:1330442 [3] NCCL INFO Channel 01/0 : 87[c7000] -> 88[7000] [send] via NET/IB/0
+gpub066:1330381:1330442 [3] NCCL INFO Connected all rings
+gpub066:1330381:1330442 [3] NCCL INFO Channel 00/0 : 87[c7000] -> 86[85000] via P2P/IPC
+gpub066:1330381:1330442 [3] NCCL INFO Channel 01/0 : 87[c7000] -> 86[85000] via P2P/IPC
+gpub066:1330381:1330442 [3] NCCL INFO Connected all trees
+gpub066:1330381:1330442 [3] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub066:1330381:1330442 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1330381:1330442 [3] NCCL INFO comm 0x50ca2de0 rank 87 nranks 128 cudaDev 3 busId c7000 - Init COMPLETE
+gpub066:1330378:1330378 [0] NCCL INFO cudaDriverVersion 12010
+gpub066:1330378:1330378 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1330378:1330378 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1330378:1330440 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1330378:1330440 [0] NCCL INFO Using network IB
+gpub066:1330378:1330440 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub066:1330378:1330440 [0] NCCL INFO Trees [0] 85/-1/-1->84->89 [1] 85/80/-1->84->77
+gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 83[c7000] -> 84[7000] [receive] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 83[c7000] -> 84[7000] [receive] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 84[7000] -> 85[46000] via P2P/IPC
+gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 84[7000] -> 85[46000] via P2P/IPC
+gpub066:1330378:1330440 [0] NCCL INFO Connected all rings
+gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 80[7000] -> 84[7000] [receive] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 84[7000] -> 89[46000] [send] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 77[46000] -> 84[7000] [receive] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 84[7000] -> 77[46000] [send] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 00/0 : 89[46000] -> 84[7000] [receive] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Channel 01/0 : 84[7000] -> 80[7000] [send] via NET/IB/0
+gpub066:1330378:1330440 [0] NCCL INFO Connected all trees
+gpub066:1330378:1330440 [0] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub066:1330378:1330440 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1330378:1330440 [0] NCCL INFO comm 0x8c91e400 rank 84 nranks 128 cudaDev 0 busId 7000 - Init COMPLETE
+gpub066:1330379:1330379 [1] NCCL INFO cudaDriverVersion 12010
+gpub066:1330379:1330379 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1330379:1330379 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1330379:1330439 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1330379:1330439 [1] NCCL INFO Using network IB
+gpub066:1330379:1330439 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub066:1330379:1330439 [1] NCCL INFO Trees [0] 86/-1/-1->85->84 [1] 86/88/-1->85->84
+gpub066:1330379:1330439 [1] NCCL INFO Channel 00/0 : 85[46000] -> 86[85000] via P2P/IPC
+gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 85[46000] -> 86[85000] via P2P/IPC
+gpub066:1330379:1330439 [1] NCCL INFO Connected all rings
+gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 85[46000] -> 88[7000] [send] via NET/IB/0
+gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 88[7000] -> 85[46000] [receive] via NET/IB/0
+gpub066:1330379:1330439 [1] NCCL INFO Channel 00/0 : 85[46000] -> 84[7000] via P2P/IPC
+gpub066:1330379:1330439 [1] NCCL INFO Channel 01/0 : 85[46000] -> 84[7000] via P2P/IPC
+gpub066:1330379:1330439 [1] NCCL INFO Connected all trees
+gpub066:1330379:1330439 [1] NCCL INFO threadThresholds 8/8/64 | 1024/8/64 | 512 | 512
+gpub066:1330379:1330439 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1330379:1330439 [1] NCCL INFO comm 0xa42e450 rank 85 nranks 128 cudaDev 1 busId 46000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub001:0/128] 2023-07-02 01:48:07,181 (trainer:732) INFO: 1epoch:train:1-100batch: iter_time=1.540, forward_time=0.272, loss_ctc=540.181, loss_att=397.271, acc=0.027, loss=440.144, backward_time=1.098, grad_norm=584.638, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.122, optim0_lr0=1.288e-06, train_time=4.972
+[gpub001:0/128] 2023-07-02 01:50:35,497 (trainer:732) INFO: 1epoch:train:101-200batch: iter_time=1.152e-04, forward_time=0.141, loss_ctc=462.426, loss_att=336.277, acc=0.029, loss=374.121, backward_time=1.077, grad_norm=387.500, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.788e-06, train_time=1.483
+[gpub001:0/128] 2023-07-02 01:53:03,465 (trainer:732) INFO: 1epoch:train:201-300batch: iter_time=1.085e-04, forward_time=0.142, loss_ctc=461.451, loss_att=337.298, acc=0.045, loss=374.544, backward_time=1.078, grad_norm=417.405, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=6.287e-06, train_time=1.479
+[gpub001:0/128] 2023-07-02 01:55:31,849 (trainer:732) INFO: 1epoch:train:301-400batch: iter_time=1.042e-04, forward_time=0.143, loss_ctc=342.808, loss_att=285.551, acc=0.064, loss=302.728, backward_time=1.080, grad_norm=643.089, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=8.788e-06, train_time=1.484
+[gpub001:0/128] 2023-07-02 01:55:42,501 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/128] 2023-07-02 01:56:04,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 01:56:08,967 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9612236a40>)
+[gpub001:0/128] 2023-07-02 01:56:08,967 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/128] 2023-07-02 01:56:08,971 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 02:02:36,435 (trainer:732) INFO: 1epoch:train:401-500batch: iter_time=1.606, forward_time=0.145, loss_ctc=338.668, loss_att=312.612, acc=0.065, loss=320.429, backward_time=1.102, grad_norm=722.907, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.129e-05, train_time=4.246
+[gpub001:0/128] 2023-07-02 02:05:05,101 (trainer:732) INFO: 1epoch:train:501-600batch: iter_time=9.538e-05, forward_time=0.144, loss_ctc=290.690, loss_att=280.674, acc=0.090, loss=283.679, backward_time=1.078, grad_norm=573.866, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.379e-05, train_time=1.486
+[gpub001:0/128] 2023-07-02 02:07:33,618 (trainer:732) INFO: 1epoch:train:601-700batch: iter_time=9.612e-05, forward_time=0.143, loss_ctc=302.811, loss_att=317.380, acc=0.118, loss=313.009, backward_time=1.077, grad_norm=525.059, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.629e-05, train_time=1.485
+[gpub001:0/128] 2023-07-02 02:10:02,081 (trainer:732) INFO: 1epoch:train:701-800batch: iter_time=9.252e-05, forward_time=0.143, loss_ctc=263.886, loss_att=258.073, acc=0.153, loss=259.817, backward_time=1.077, grad_norm=413.463, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=1.879e-05, train_time=1.484
+[gpub001:0/128] 2023-07-02 02:10:03,693 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/128] 2023-07-02 02:10:25,856 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 02:10:30,152 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f940aef6b60>)
+[gpub001:0/128] 2023-07-02 02:10:30,152 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/128] 2023-07-02 02:10:30,156 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 02:18:38,072 (trainer:732) INFO: 1epoch:train:801-900batch: iter_time=1.499, forward_time=0.145, loss_ctc=297.783, loss_att=277.425, acc=0.155, loss=283.533, backward_time=1.202, grad_norm=432.564, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.129e-05, train_time=5.160
+[gpub001:0/128] 2023-07-02 02:21:09,704 (trainer:732) INFO: 1epoch:train:901-1000batch: iter_time=9.771e-05, forward_time=0.144, loss_ctc=274.105, loss_att=250.004, acc=0.168, loss=257.234, backward_time=1.078, grad_norm=408.850, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.379e-05, train_time=1.516
+[gpub001:0/128] 2023-07-02 02:23:38,330 (trainer:732) INFO: 1epoch:train:1001-1100batch: iter_time=1.074e-04, forward_time=0.143, loss_ctc=293.450, loss_att=288.752, acc=0.156, loss=290.161, backward_time=1.078, grad_norm=433.379, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.629e-05, train_time=1.486
+[gpub001:0/128] 2023-07-02 02:26:07,352 (trainer:732) INFO: 1epoch:train:1101-1200batch: iter_time=9.141e-05, forward_time=0.144, loss_ctc=254.103, loss_att=237.304, acc=0.175, loss=242.344, backward_time=1.079, grad_norm=378.541, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=2.879e-05, train_time=1.490
+[gpub001:0/128] 2023-07-02 02:26:09,180 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/128] 2023-07-02 02:26:31,466 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 02:26:35,764 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f961224f460>)
+[gpub001:0/128] 2023-07-02 02:26:35,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/128] 2023-07-02 02:26:35,768 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 02:33:43,766 (trainer:732) INFO: 1epoch:train:1201-1300batch: iter_time=1.566, forward_time=0.182, loss_ctc=284.394, loss_att=260.556, acc=0.170, loss=267.707, backward_time=1.096, grad_norm=319.756, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.123, optim0_lr0=3.129e-05, train_time=4.564
+[gpub001:0/128] 2023-07-02 02:36:17,719 (trainer:732) INFO: 1epoch:train:1301-1400batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=264.134, loss_att=232.279, acc=0.185, loss=241.835, backward_time=1.082, grad_norm=322.456, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.379e-05, train_time=1.539
+[gpub001:0/128] 2023-07-02 02:38:56,271 (trainer:732) INFO: 1epoch:train:1401-1500batch: iter_time=9.806e-05, forward_time=0.145, loss_ctc=283.768, loss_att=254.155, acc=0.174, loss=263.039, backward_time=1.085, grad_norm=323.685, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.629e-05, train_time=1.585
+[gpub001:0/128] 2023-07-02 02:41:27,198 (trainer:732) INFO: 1epoch:train:1501-1600batch: iter_time=9.429e-05, forward_time=0.145, loss_ctc=248.736, loss_att=223.280, acc=0.187, loss=230.917, backward_time=1.079, grad_norm=295.086, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=3.879e-05, train_time=1.509
+[gpub001:0/128] 2023-07-02 02:41:41,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/128] 2023-07-02 02:42:03,909 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 02:42:08,170 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f90c2ef5570>)
+[gpub001:0/128] 2023-07-02 02:42:08,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/128] 2023-07-02 02:42:08,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 02:47:44,894 (trainer:732) INFO: 1epoch:train:1601-1700batch: iter_time=2.210, forward_time=0.168, loss_ctc=277.204, loss_att=247.225, acc=0.188, loss=256.219, backward_time=1.098, grad_norm=299.571, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.129e-05, train_time=3.776
+[gpub001:0/128] 2023-07-02 02:50:21,446 (trainer:732) INFO: 1epoch:train:1701-1800batch: iter_time=8.222e-05, forward_time=0.144, loss_ctc=256.731, loss_att=222.419, acc=0.198, loss=232.713, backward_time=1.097, grad_norm=275.648, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.379e-05, train_time=1.566
+[gpub001:0/128] 2023-07-02 02:53:02,939 (trainer:732) INFO: 1epoch:train:1801-1900batch: iter_time=8.187e-05, forward_time=0.144, loss_ctc=274.581, loss_att=254.361, acc=0.182, loss=260.427, backward_time=1.114, grad_norm=313.056, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.629e-05, train_time=1.615
+[gpub001:0/128] 2023-07-02 02:55:42,853 (trainer:732) INFO: 1epoch:train:1901-2000batch: iter_time=8.359e-05, forward_time=0.143, loss_ctc=236.100, loss_att=214.394, acc=0.200, loss=220.906, backward_time=1.087, grad_norm=255.461, clip=100.000, loss_scale=6.554e+04, optim_step_time=0.121, optim0_lr0=4.879e-05, train_time=1.599
+[gpub001:0/128] 2023-07-02 02:55:53,991 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/128] 2023-07-02 02:56:16,340 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 02:56:20,644 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f886a53b790>)
+[gpub001:0/128] 2023-07-02 02:56:20,644 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/128] 2023-07-02 02:56:20,648 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 03:02:12,674 (trainer:732) INFO: 1epoch:train:2001-2100batch: iter_time=1.648, forward_time=0.145, loss_ctc=255.392, loss_att=234.231, acc=0.196, loss=240.580, backward_time=1.102, grad_norm=254.801, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.129e-05, train_time=3.898
+[gpub001:0/128] 2023-07-02 03:04:41,291 (trainer:732) INFO: 1epoch:train:2101-2200batch: iter_time=8.832e-05, forward_time=0.145, loss_ctc=228.533, loss_att=210.197, acc=0.210, loss=215.698, backward_time=1.080, grad_norm=265.408, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.379e-05, train_time=1.486
+[gpub001:0/128] 2023-07-02 03:07:09,624 (trainer:732) INFO: 1epoch:train:2201-2300batch: iter_time=9.138e-05, forward_time=0.144, loss_ctc=231.701, loss_att=230.702, acc=0.198, loss=231.002, backward_time=1.079, grad_norm=283.955, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.629e-05, train_time=1.483
+[gpub001:0/128] 2023-07-02 03:09:49,400 (trainer:732) INFO: 1epoch:train:2301-2400batch: iter_time=8.965e-05, forward_time=0.144, loss_ctc=196.101, loss_att=202.117, acc=0.215, loss=200.312, backward_time=1.099, grad_norm=248.405, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=5.879e-05, train_time=1.598
+[gpub001:0/128] 2023-07-02 03:09:51,010 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/128] 2023-07-02 03:10:13,711 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 03:10:18,017 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f886a6908e0>)
+[gpub001:0/128] 2023-07-02 03:10:18,018 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/128] 2023-07-02 03:10:18,021 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 03:18:03,936 (trainer:732) INFO: 1epoch:train:2401-2500batch: iter_time=1.551, forward_time=0.146, loss_ctc=209.517, loss_att=221.916, acc=0.228, loss=218.197, backward_time=1.101, grad_norm=277.157, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.129e-05, train_time=4.945
+[gpub001:0/128] 2023-07-02 03:20:32,902 (trainer:732) INFO: 1epoch:train:2501-2600batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=197.334, loss_att=192.816, acc=0.252, loss=194.171, backward_time=1.080, grad_norm=292.260, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.379e-05, train_time=1.489
+[gpub001:0/128] 2023-07-02 03:23:26,427 (trainer:732) INFO: 1epoch:train:2601-2700batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=194.768, loss_att=218.914, acc=0.251, loss=211.670, backward_time=1.099, grad_norm=328.740, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.629e-05, train_time=1.735
+[gpub001:0/128] 2023-07-02 03:25:58,159 (trainer:732) INFO: 1epoch:train:2701-2800batch: iter_time=9.939e-05, forward_time=0.146, loss_ctc=167.623, loss_att=168.951, acc=0.295, loss=168.553, backward_time=1.081, grad_norm=241.927, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=6.879e-05, train_time=1.517
+[gpub001:0/128] 2023-07-02 03:26:07,547 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/128] 2023-07-02 03:26:29,813 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 03:26:34,386 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8874514cd0>)
+[gpub001:0/128] 2023-07-02 03:26:34,386 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/128] 2023-07-02 03:26:34,390 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 03:31:56,946 (trainer:732) INFO: 1epoch:train:2801-2900batch: iter_time=1.619, forward_time=0.172, loss_ctc=176.463, loss_att=177.295, acc=0.315, loss=177.046, backward_time=1.103, grad_norm=201.912, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.122, optim0_lr0=7.129e-05, train_time=3.588
+[gpub001:0/128] 2023-07-02 03:34:26,090 (trainer:732) INFO: 1epoch:train:2901-3000batch: iter_time=8.798e-05, forward_time=0.144, loss_ctc=169.381, loss_att=157.379, acc=0.328, loss=160.979, backward_time=1.077, grad_norm=221.600, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=7.379e-05, train_time=1.491
+[gpub001:0/128] 2023-07-02 03:36:55,616 (trainer:732) INFO: 1epoch:train:3001-3100batch: iter_time=9.043e-05, forward_time=0.146, loss_ctc=164.817, loss_att=190.673, acc=0.310, loss=182.916, backward_time=1.082, grad_norm=242.341, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=7.629e-05, train_time=1.495
+[gpub001:0/128] 2023-07-02 03:39:26,502 (trainer:732) INFO: 1epoch:train:3101-3200batch: iter_time=9.068e-05, forward_time=0.144, loss_ctc=152.565, loss_att=148.213, acc=0.340, loss=149.518, backward_time=1.083, grad_norm=231.027, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=7.879e-05, train_time=1.509
+[gpub001:0/128] 2023-07-02 03:39:30,891 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/128] 2023-07-02 03:39:53,618 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 03:39:57,929 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8817e0f2e0>)
+[gpub001:0/128] 2023-07-02 03:39:57,929 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/128] 2023-07-02 03:39:57,932 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 03:47:03,652 (trainer:732) INFO: 1epoch:train:3201-3300batch: iter_time=2.099, forward_time=0.144, loss_ctc=155.601, loss_att=164.815, acc=0.345, loss=162.051, backward_time=1.111, grad_norm=210.374, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.129e-05, train_time=4.571
+[gpub001:0/128] 2023-07-02 03:49:32,184 (trainer:732) INFO: 1epoch:train:3301-3400batch: iter_time=1.183e-04, forward_time=0.144, loss_ctc=152.491, loss_att=143.312, acc=0.360, loss=146.066, backward_time=1.076, grad_norm=205.020, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.379e-05, train_time=1.486
+[gpub001:0/128] 2023-07-02 03:52:01,611 (trainer:732) INFO: 1epoch:train:3401-3500batch: iter_time=1.060e-04, forward_time=0.145, loss_ctc=148.864, loss_att=168.528, acc=0.335, loss=162.629, backward_time=1.078, grad_norm=210.197, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.629e-05, train_time=1.494
+[gpub001:0/128] 2023-07-02 03:54:31,233 (trainer:732) INFO: 1epoch:train:3501-3600batch: iter_time=9.133e-05, forward_time=0.145, loss_ctc=135.953, loss_att=134.611, acc=0.371, loss=135.014, backward_time=1.079, grad_norm=179.182, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=8.879e-05, train_time=1.496
+[gpub001:0/128] 2023-07-02 03:54:40,963 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/128] 2023-07-02 03:55:03,510 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 03:55:07,768 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8847f39fc0>)
+[gpub001:0/128] 2023-07-02 03:55:07,768 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/128] 2023-07-02 03:55:07,772 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 04:02:55,539 (trainer:732) INFO: 1epoch:train:3601-3700batch: iter_time=1.634, forward_time=0.172, loss_ctc=141.664, loss_att=149.355, acc=0.379, loss=147.048, backward_time=1.096, grad_norm=213.922, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.129e-05, train_time=5.043
+[gpub001:0/128] 2023-07-02 04:05:24,077 (trainer:732) INFO: 1epoch:train:3701-3800batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=140.225, loss_att=130.232, acc=0.390, loss=133.230, backward_time=1.077, grad_norm=217.100, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.379e-05, train_time=1.485
+[gpub001:0/128] 2023-07-02 04:07:53,148 (trainer:732) INFO: 1epoch:train:3801-3900batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=137.165, loss_att=157.511, acc=0.355, loss=151.407, backward_time=1.077, grad_norm=196.286, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.629e-05, train_time=1.490
+[gpub001:0/128] 2023-07-02 04:10:39,053 (trainer:732) INFO: 1epoch:train:3901-4000batch: iter_time=9.469e-05, forward_time=0.144, loss_ctc=124.160, loss_att=126.918, acc=0.390, loss=126.091, backward_time=1.090, grad_norm=241.832, clip=100.000, loss_scale=1.311e+05, optim_step_time=0.121, optim0_lr0=9.879e-05, train_time=1.659
+[gpub001:0/128] 2023-07-02 04:20:41,655 (trainer:338) INFO: 1epoch results: [train] iter_time=0.424, forward_time=0.150, loss_ctc=243.208, loss_att=225.399, acc=0.220, loss=230.742, backward_time=1.090, grad_norm=327.236, clip=100.000, loss_scale=9.830e+04, optim_step_time=0.121, optim0_lr0=5.004e-05, train_time=2.262, time=2 hours, 31 minutes and 2.4 seconds, total_count=4000, gpu_max_cached_mem_GB=33.912, [valid] loss_ctc=122.230, cer_ctc=0.606, loss_att=129.277, acc=0.275, cer=0.670, wer=1.000, loss=127.163, time=3 minutes and 57.44 seconds, total_count=506, gpu_max_cached_mem_GB=37.207, [att_plot] time=5 minutes and 51.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.207
+[gpub001:0/128] 2023-07-02 04:20:57,442 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub001:0/128] 2023-07-02 04:20:57,442 (trainer:272) INFO: 2/100epoch started. Estimated time to finish: 1 week, 4 days and 1 hour
+[gpub001:0/128] 2023-07-02 04:20:57,445 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/128] 2023-07-02 04:21:19,393 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 04:21:23,457 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8823b09bd0>)
+[gpub001:0/128] 2023-07-02 04:21:23,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/128] 2023-07-02 04:21:23,461 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 04:26:11,570 (trainer:732) INFO: 2epoch:train:1-100batch: iter_time=1.523, forward_time=0.165, loss_ctc=141.462, loss_att=147.694, acc=0.365, loss=145.825, backward_time=1.111, grad_norm=180.767, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.122, optim0_lr0=1.013e-04, train_time=3.141
+[gpub001:0/128] 2023-07-02 04:28:54,092 (trainer:732) INFO: 2epoch:train:101-200batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=127.915, loss_att=134.951, acc=0.409, loss=132.840, backward_time=1.103, grad_norm=154.665, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.038e-04, train_time=1.625
+[gpub001:0/128] 2023-07-02 04:31:31,556 (trainer:732) INFO: 2epoch:train:201-300batch: iter_time=9.197e-05, forward_time=0.200, loss_ctc=125.885, loss_att=122.785, acc=0.379, loss=123.715, backward_time=1.092, grad_norm=178.909, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.124, optim0_lr0=1.063e-04, train_time=1.574
+[gpub001:0/128] 2023-07-02 04:34:11,293 (trainer:732) INFO: 2epoch:train:301-400batch: iter_time=9.427e-05, forward_time=0.147, loss_ctc=121.684, loss_att=132.872, acc=0.377, loss=129.516, backward_time=1.105, grad_norm=197.046, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.088e-04, train_time=1.597
+[gpub001:0/128] 2023-07-02 04:34:19,159 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/128] 2023-07-02 04:34:40,978 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 04:34:45,187 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8857638400>)
+[gpub001:0/128] 2023-07-02 04:34:45,187 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/128] 2023-07-02 04:34:45,191 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 04:42:26,425 (trainer:732) INFO: 2epoch:train:401-500batch: iter_time=2.464, forward_time=0.183, loss_ctc=133.533, loss_att=141.745, acc=0.391, loss=139.281, backward_time=1.100, grad_norm=177.034, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.122, optim0_lr0=1.113e-04, train_time=4.951
+[gpub001:0/128] 2023-07-02 04:44:57,986 (trainer:732) INFO: 2epoch:train:501-600batch: iter_time=1.026e-04, forward_time=0.146, loss_ctc=117.575, loss_att=129.798, acc=0.436, loss=126.131, backward_time=1.087, grad_norm=157.194, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.138e-04, train_time=1.516
+[gpub001:0/128] 2023-07-02 04:47:30,329 (trainer:732) INFO: 2epoch:train:601-700batch: iter_time=1.003e-04, forward_time=0.146, loss_ctc=114.442, loss_att=114.728, acc=0.414, loss=114.642, backward_time=1.082, grad_norm=169.206, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.163e-04, train_time=1.523
+[gpub001:0/128] 2023-07-02 04:50:12,389 (trainer:732) INFO: 2epoch:train:701-800batch: iter_time=9.729e-05, forward_time=0.145, loss_ctc=117.962, loss_att=130.095, acc=0.398, loss=126.455, backward_time=1.107, grad_norm=202.411, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.188e-04, train_time=1.620
+[gpub001:0/128] 2023-07-02 04:50:19,407 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/128] 2023-07-02 04:50:41,623 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 04:50:45,945 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88f5a4bfa0>)
+[gpub001:0/128] 2023-07-02 04:50:45,945 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/128] 2023-07-02 04:50:45,949 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 04:58:45,325 (trainer:732) INFO: 2epoch:train:801-900batch: iter_time=1.604, forward_time=0.186, loss_ctc=126.010, loss_att=132.556, acc=0.414, loss=130.592, backward_time=1.105, grad_norm=175.243, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.213e-04, train_time=5.129
+[gpub001:0/128] 2023-07-02 05:01:17,736 (trainer:732) INFO: 2epoch:train:901-1000batch: iter_time=9.383e-05, forward_time=0.149, loss_ctc=109.712, loss_att=118.287, acc=0.466, loss=115.715, backward_time=1.088, grad_norm=142.149, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.238e-04, train_time=1.524
+[gpub001:0/128] 2023-07-02 05:03:47,575 (trainer:732) INFO: 2epoch:train:1001-1100batch: iter_time=1.014e-04, forward_time=0.147, loss_ctc=110.053, loss_att=106.480, acc=0.439, loss=107.552, backward_time=1.086, grad_norm=153.951, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.263e-04, train_time=1.498
+[gpub001:0/128] 2023-07-02 05:06:16,384 (trainer:732) INFO: 2epoch:train:1101-1200batch: iter_time=9.267e-05, forward_time=0.147, loss_ctc=107.375, loss_att=120.439, acc=0.425, loss=116.520, backward_time=1.082, grad_norm=193.914, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.288e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 05:06:27,229 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/128] 2023-07-02 05:06:49,654 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 05:06:54,182 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88b4a2f7f0>)
+[gpub001:0/128] 2023-07-02 05:06:54,182 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/128] 2023-07-02 05:06:54,186 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 05:13:53,395 (trainer:732) INFO: 2epoch:train:1201-1300batch: iter_time=2.083, forward_time=0.188, loss_ctc=119.429, loss_att=122.178, acc=0.442, loss=121.353, backward_time=1.161, grad_norm=211.966, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.122, optim0_lr0=1.313e-04, train_time=4.569
+[gpub001:0/128] 2023-07-02 05:16:51,682 (trainer:732) INFO: 2epoch:train:1301-1400batch: iter_time=8.730e-05, forward_time=0.148, loss_ctc=105.735, loss_att=111.683, acc=0.485, loss=109.899, backward_time=1.179, grad_norm=138.234, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.338e-04, train_time=1.784
+[gpub001:0/128] 2023-07-02 05:19:35,325 (trainer:732) INFO: 2epoch:train:1401-1500batch: iter_time=9.301e-05, forward_time=0.145, loss_ctc=104.633, loss_att=99.671, acc=0.460, loss=101.159, backward_time=1.106, grad_norm=170.937, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.363e-04, train_time=1.636
+[gpub001:0/128] 2023-07-02 05:22:31,703 (trainer:732) INFO: 2epoch:train:1501-1600batch: iter_time=9.055e-05, forward_time=0.146, loss_ctc=102.772, loss_att=110.998, acc=0.455, loss=108.530, backward_time=1.109, grad_norm=160.641, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.388e-04, train_time=1.764
+[gpub001:0/128] 2023-07-02 05:22:33,982 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/128] 2023-07-02 05:22:56,271 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 05:23:00,511 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88f5a10610>)
+[gpub001:0/128] 2023-07-02 05:23:00,511 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/128] 2023-07-02 05:23:00,515 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 05:29:35,230 (trainer:732) INFO: 2epoch:train:1601-1700batch: iter_time=1.551, forward_time=0.149, loss_ctc=113.295, loss_att=115.104, acc=0.459, loss=114.561, backward_time=1.108, grad_norm=155.653, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.413e-04, train_time=4.235
+[gpub001:0/128] 2023-07-02 05:32:13,472 (trainer:732) INFO: 2epoch:train:1701-1800batch: iter_time=1.052e-04, forward_time=0.146, loss_ctc=103.800, loss_att=106.712, acc=0.499, loss=105.838, backward_time=1.096, grad_norm=148.694, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.438e-04, train_time=1.582
+[gpub001:0/128] 2023-07-02 05:34:44,526 (trainer:732) INFO: 2epoch:train:1801-1900batch: iter_time=1.072e-04, forward_time=0.147, loss_ctc=101.364, loss_att=92.579, acc=0.479, loss=95.215, backward_time=1.082, grad_norm=163.286, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.463e-04, train_time=1.510
+[gpub001:0/128] 2023-07-02 05:37:18,235 (trainer:732) INFO: 2epoch:train:1901-2000batch: iter_time=9.932e-05, forward_time=0.146, loss_ctc=102.646, loss_att=106.142, acc=0.463, loss=105.093, backward_time=1.095, grad_norm=182.571, clip=100.000, loss_scale=2.621e+05, optim_step_time=0.121, optim0_lr0=1.488e-04, train_time=1.537
+[gpub001:0/128] 2023-07-02 05:37:19,899 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/128] 2023-07-02 05:37:42,251 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 05:37:46,475 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86fd1efbb0>)
+[gpub001:0/128] 2023-07-02 05:37:46,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/128] 2023-07-02 05:37:46,479 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 05:48:10,631 (trainer:732) INFO: 2epoch:train:2001-2100batch: iter_time=1.530, forward_time=0.146, loss_ctc=116.459, loss_att=113.836, acc=0.467, loss=114.623, backward_time=1.101, grad_norm=190.683, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.513e-04, train_time=6.524
+[gpub001:0/128] 2023-07-02 05:50:53,948 (trainer:732) INFO: 2epoch:train:2101-2200batch: iter_time=9.630e-05, forward_time=0.149, loss_ctc=98.615, loss_att=98.760, acc=0.520, loss=98.716, backward_time=1.108, grad_norm=133.124, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.538e-04, train_time=1.633
+[gpub001:0/128] 2023-07-02 05:53:22,527 (trainer:732) INFO: 2epoch:train:2201-2300batch: iter_time=1.008e-04, forward_time=0.146, loss_ctc=96.354, loss_att=87.441, acc=0.497, loss=90.115, backward_time=1.080, grad_norm=137.996, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.563e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 05:55:51,147 (trainer:732) INFO: 2epoch:train:2301-2400batch: iter_time=9.314e-05, forward_time=0.147, loss_ctc=97.507, loss_att=99.440, acc=0.485, loss=98.860, backward_time=1.081, grad_norm=204.800, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.588e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 05:55:53,085 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/128] 2023-07-02 05:56:15,243 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 05:56:19,560 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89299e7ca0>)
+[gpub001:0/128] 2023-07-02 05:56:19,560 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/128] 2023-07-02 05:56:19,564 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 06:03:08,333 (trainer:732) INFO: 2epoch:train:2401-2500batch: iter_time=1.611, forward_time=0.178, loss_ctc=108.028, loss_att=109.475, acc=0.488, loss=109.041, backward_time=1.104, grad_norm=160.868, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.122, optim0_lr0=1.613e-04, train_time=4.371
+[gpub001:0/128] 2023-07-02 06:05:38,294 (trainer:732) INFO: 2epoch:train:2501-2600batch: iter_time=9.944e-05, forward_time=0.147, loss_ctc=100.030, loss_att=100.384, acc=0.532, loss=100.278, backward_time=1.086, grad_norm=166.489, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.638e-04, train_time=1.500
+[gpub001:0/128] 2023-07-02 06:08:07,446 (trainer:732) INFO: 2epoch:train:2601-2700batch: iter_time=1.010e-04, forward_time=0.147, loss_ctc=95.719, loss_att=86.792, acc=0.515, loss=89.470, backward_time=1.082, grad_norm=137.126, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.663e-04, train_time=1.491
+[gpub001:0/128] 2023-07-02 06:10:36,538 (trainer:732) INFO: 2epoch:train:2701-2800batch: iter_time=1.025e-04, forward_time=0.148, loss_ctc=95.055, loss_att=99.392, acc=0.503, loss=98.091, backward_time=1.084, grad_norm=141.816, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.688e-04, train_time=1.491
+[gpub001:0/128] 2023-07-02 06:10:46,481 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/128] 2023-07-02 06:11:08,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 06:11:12,893 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86f419c7f0>)
+[gpub001:0/128] 2023-07-02 06:11:12,893 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/128] 2023-07-02 06:11:12,897 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 06:18:12,047 (trainer:732) INFO: 2epoch:train:2801-2900batch: iter_time=2.408, forward_time=0.148, loss_ctc=105.117, loss_att=104.174, acc=0.506, loss=104.457, backward_time=1.103, grad_norm=133.346, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.713e-04, train_time=4.555
+[gpub001:0/128] 2023-07-02 06:20:42,419 (trainer:732) INFO: 2epoch:train:2901-3000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=97.400, loss_att=94.758, acc=0.546, loss=95.551, backward_time=1.086, grad_norm=188.040, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.738e-04, train_time=1.504
+[gpub001:0/128] 2023-07-02 06:23:24,163 (trainer:732) INFO: 2epoch:train:3001-3100batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=98.565, loss_att=87.186, acc=0.519, loss=90.599, backward_time=1.106, grad_norm=160.472, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.763e-04, train_time=1.617
+[gpub001:0/128] 2023-07-02 06:25:58,596 (trainer:732) INFO: 2epoch:train:3101-3200batch: iter_time=9.116e-05, forward_time=0.146, loss_ctc=94.556, loss_att=97.305, acc=0.507, loss=96.480, backward_time=1.088, grad_norm=152.535, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.788e-04, train_time=1.544
+[gpub001:0/128] 2023-07-02 06:26:00,561 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/128] 2023-07-02 06:26:23,005 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 06:26:27,267 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89328f4250>)
+[gpub001:0/128] 2023-07-02 06:26:27,267 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/128] 2023-07-02 06:26:27,271 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 06:33:58,814 (trainer:732) INFO: 2epoch:train:3201-3300batch: iter_time=1.523, forward_time=0.163, loss_ctc=105.148, loss_att=101.685, acc=0.514, loss=102.724, backward_time=1.128, grad_norm=146.660, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.122, optim0_lr0=1.813e-04, train_time=4.802
+[gpub001:0/128] 2023-07-02 06:36:43,188 (trainer:732) INFO: 2epoch:train:3301-3400batch: iter_time=8.705e-05, forward_time=0.148, loss_ctc=93.198, loss_att=91.092, acc=0.554, loss=91.724, backward_time=1.095, grad_norm=154.320, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.838e-04, train_time=1.644
+[gpub001:0/128] 2023-07-02 06:39:35,381 (trainer:732) INFO: 2epoch:train:3401-3500batch: iter_time=9.148e-05, forward_time=0.145, loss_ctc=93.879, loss_att=82.609, acc=0.528, loss=85.990, backward_time=1.097, grad_norm=142.568, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.863e-04, train_time=1.722
+[gpub001:0/128] 2023-07-02 06:42:04,168 (trainer:732) INFO: 2epoch:train:3501-3600batch: iter_time=8.728e-05, forward_time=0.146, loss_ctc=91.983, loss_att=91.456, acc=0.522, loss=91.614, backward_time=1.081, grad_norm=151.785, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.888e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 06:42:05,883 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/128] 2023-07-02 06:42:28,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 06:42:32,765 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f87594abd30>)
+[gpub001:0/128] 2023-07-02 06:42:32,765 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/128] 2023-07-02 06:42:32,769 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 06:48:18,823 (trainer:732) INFO: 2epoch:train:3601-3700batch: iter_time=1.599, forward_time=0.170, loss_ctc=105.844, loss_att=98.496, acc=0.521, loss=100.700, backward_time=1.106, grad_norm=150.808, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.913e-04, train_time=3.746
+[gpub001:0/128] 2023-07-02 06:50:50,784 (trainer:732) INFO: 2epoch:train:3701-3800batch: iter_time=9.644e-05, forward_time=0.146, loss_ctc=94.459, loss_att=88.622, acc=0.558, loss=90.373, backward_time=1.084, grad_norm=127.971, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.938e-04, train_time=1.520
+[gpub001:0/128] 2023-07-02 06:53:19,863 (trainer:732) INFO: 2epoch:train:3801-3900batch: iter_time=9.916e-05, forward_time=0.146, loss_ctc=90.685, loss_att=77.556, acc=0.547, loss=81.494, backward_time=1.082, grad_norm=162.079, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.963e-04, train_time=1.491
+[gpub001:0/128] 2023-07-02 06:55:49,487 (trainer:732) INFO: 2epoch:train:3901-4000batch: iter_time=9.854e-05, forward_time=0.145, loss_ctc=93.244, loss_att=90.302, acc=0.527, loss=91.184, backward_time=1.080, grad_norm=140.916, clip=100.000, loss_scale=5.243e+05, optim_step_time=0.121, optim0_lr0=1.988e-04, train_time=1.496
+[gpub001:0/128] 2023-07-02 07:05:45,651 (trainer:338) INFO: 2epoch results: [train] iter_time=0.448, forward_time=0.153, loss_ctc=106.978, loss_att=107.457, acc=0.475, loss=107.313, backward_time=1.099, grad_norm=162.472, clip=100.000, loss_scale=3.932e+05, optim_step_time=0.121, optim0_lr0=1.500e-04, train_time=2.323, time=2 hours, 35 minutes and 3.24 seconds, total_count=8000, gpu_max_cached_mem_GB=37.209, [valid] loss_ctc=97.384, cer_ctc=0.435, loss_att=89.451, acc=0.424, cer=0.559, wer=1.000, loss=91.831, time=3 minutes and 54.37 seconds, total_count=1012, gpu_max_cached_mem_GB=37.209, [att_plot] time=5 minutes and 50.55 seconds, total_count=0, gpu_max_cached_mem_GB=37.209
+[gpub001:0/128] 2023-07-02 07:06:05,363 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub001:0/128] 2023-07-02 07:06:05,432 (trainer:272) INFO: 3/100epoch started. Estimated time to finish: 1 week, 4 days and 2 hours
+[gpub001:0/128] 2023-07-02 07:06:06,952 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/128] 2023-07-02 07:06:28,662 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 07:06:35,113 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f91201669b0>)
+[gpub001:0/128] 2023-07-02 07:06:35,113 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/128] 2023-07-02 07:06:35,236 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 07:14:50,669 (trainer:732) INFO: 3epoch:train:1-100batch: iter_time=3.597, forward_time=0.184, loss_ctc=99.267, loss_att=94.412, acc=0.525, loss=95.868, backward_time=1.110, grad_norm=176.962, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.123, optim0_lr0=2.013e-04, train_time=5.245
+[gpub001:0/128] 2023-07-02 07:17:25,258 (trainer:732) INFO: 3epoch:train:101-200batch: iter_time=9.299e-05, forward_time=0.157, loss_ctc=113.344, loss_att=105.667, acc=0.511, loss=107.970, backward_time=1.096, grad_norm=200.073, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.038e-04, train_time=1.546
+[gpub001:0/128] 2023-07-02 07:20:08,475 (trainer:732) INFO: 3epoch:train:201-300batch: iter_time=9.056e-05, forward_time=0.183, loss_ctc=101.734, loss_att=111.837, acc=0.507, loss=108.806, backward_time=1.108, grad_norm=163.340, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.123, optim0_lr0=2.063e-04, train_time=1.632
+[gpub001:0/128] 2023-07-02 07:22:51,493 (trainer:732) INFO: 3epoch:train:301-400batch: iter_time=8.434e-05, forward_time=0.235, loss_ctc=104.299, loss_att=105.729, acc=0.534, loss=105.300, backward_time=1.113, grad_norm=189.640, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.125, optim0_lr0=2.088e-04, train_time=1.630
+[gpub001:0/128] 2023-07-02 07:23:00,414 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/128] 2023-07-02 07:23:22,231 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 07:23:26,426 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f90c2b89840>)
+[gpub001:0/128] 2023-07-02 07:23:26,426 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/128] 2023-07-02 07:23:26,430 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 07:29:39,256 (trainer:732) INFO: 3epoch:train:401-500batch: iter_time=1.897, forward_time=0.147, loss_ctc=97.563, loss_att=88.949, acc=0.535, loss=91.534, backward_time=1.113, grad_norm=154.738, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.113e-04, train_time=4.077
+[gpub001:0/128] 2023-07-02 07:32:09,205 (trainer:732) INFO: 3epoch:train:501-600batch: iter_time=9.931e-05, forward_time=0.146, loss_ctc=105.782, loss_att=97.985, acc=0.527, loss=100.324, backward_time=1.085, grad_norm=177.959, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.138e-04, train_time=1.499
+[gpub001:0/128] 2023-07-02 07:34:38,498 (trainer:732) INFO: 3epoch:train:601-700batch: iter_time=1.042e-04, forward_time=0.147, loss_ctc=99.455, loss_att=104.887, acc=0.524, loss=103.257, backward_time=1.082, grad_norm=128.974, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.163e-04, train_time=1.493
+[gpub001:0/128] 2023-07-02 07:37:07,052 (trainer:732) INFO: 3epoch:train:701-800batch: iter_time=9.803e-05, forward_time=0.146, loss_ctc=98.270, loss_att=100.056, acc=0.545, loss=99.520, backward_time=1.081, grad_norm=161.994, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.188e-04, train_time=1.485
+[gpub001:0/128] 2023-07-02 07:37:09,329 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/128] 2023-07-02 07:37:31,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 07:37:35,616 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8857625b40>)
+[gpub001:0/128] 2023-07-02 07:37:35,616 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/128] 2023-07-02 07:37:35,620 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 07:45:33,041 (trainer:732) INFO: 3epoch:train:801-900batch: iter_time=1.497, forward_time=0.145, loss_ctc=93.991, loss_att=84.725, acc=0.550, loss=87.505, backward_time=1.122, grad_norm=141.107, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.213e-04, train_time=5.060
+[gpub001:0/128] 2023-07-02 07:48:06,099 (trainer:732) INFO: 3epoch:train:901-1000batch: iter_time=9.286e-05, forward_time=0.146, loss_ctc=104.802, loss_att=93.877, acc=0.536, loss=97.154, backward_time=1.088, grad_norm=185.883, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.238e-04, train_time=1.530
+[gpub001:0/128] 2023-07-02 07:50:42,566 (trainer:732) INFO: 3epoch:train:1001-1100batch: iter_time=9.304e-05, forward_time=0.145, loss_ctc=98.120, loss_att=102.667, acc=0.530, loss=101.303, backward_time=1.094, grad_norm=167.675, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.263e-04, train_time=1.564
+[gpub001:0/128] 2023-07-02 07:53:11,245 (trainer:732) INFO: 3epoch:train:1101-1200batch: iter_time=8.833e-05, forward_time=0.144, loss_ctc=100.707, loss_att=99.351, acc=0.551, loss=99.758, backward_time=1.081, grad_norm=199.649, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.288e-04, train_time=1.487
+[gpub001:0/128] 2023-07-02 07:53:12,995 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/128] 2023-07-02 07:53:35,401 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 07:53:39,690 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88ba48e470>)
+[gpub001:0/128] 2023-07-02 07:53:39,690 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/128] 2023-07-02 07:53:39,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 07:59:30,939 (trainer:732) INFO: 3epoch:train:1201-1300batch: iter_time=1.559, forward_time=0.174, loss_ctc=94.186, loss_att=87.170, acc=0.539, loss=89.275, backward_time=1.106, grad_norm=328.801, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.313e-04, train_time=3.796
+[gpub001:0/128] 2023-07-02 08:02:05,406 (trainer:732) INFO: 3epoch:train:1301-1400batch: iter_time=1.010e-04, forward_time=0.146, loss_ctc=99.263, loss_att=93.429, acc=0.537, loss=95.179, backward_time=1.098, grad_norm=148.362, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.338e-04, train_time=1.545
+[gpub001:0/128] 2023-07-02 08:04:34,756 (trainer:732) INFO: 3epoch:train:1401-1500batch: iter_time=1.014e-04, forward_time=0.146, loss_ctc=94.767, loss_att=98.356, acc=0.536, loss=97.279, backward_time=1.083, grad_norm=130.626, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.363e-04, train_time=1.493
+[gpub001:0/128] 2023-07-02 08:07:05,149 (trainer:732) INFO: 3epoch:train:1501-1600batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=100.064, loss_att=98.786, acc=0.540, loss=99.170, backward_time=1.089, grad_norm=163.999, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.388e-04, train_time=1.504
+[gpub001:0/128] 2023-07-02 08:07:16,192 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/128] 2023-07-02 08:07:38,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 08:07:43,426 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88f3beb790>)
+[gpub001:0/128] 2023-07-02 08:07:43,426 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/128] 2023-07-02 08:07:43,431 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 08:15:07,803 (trainer:732) INFO: 3epoch:train:1601-1700batch: iter_time=2.782, forward_time=0.171, loss_ctc=91.055, loss_att=81.218, acc=0.556, loss=84.169, backward_time=1.110, grad_norm=128.434, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.122, optim0_lr0=2.413e-04, train_time=4.826
+[gpub001:0/128] 2023-07-02 08:17:37,685 (trainer:732) INFO: 3epoch:train:1701-1800batch: iter_time=1.161e-04, forward_time=0.149, loss_ctc=98.061, loss_att=89.586, acc=0.548, loss=92.128, backward_time=1.082, grad_norm=146.044, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.438e-04, train_time=1.499
+[gpub001:0/128] 2023-07-02 08:20:09,607 (trainer:732) INFO: 3epoch:train:1801-1900batch: iter_time=1.026e-04, forward_time=0.146, loss_ctc=98.893, loss_att=99.335, acc=0.537, loss=99.202, backward_time=1.081, grad_norm=155.831, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.463e-04, train_time=1.519
+[gpub001:0/128] 2023-07-02 08:22:38,993 (trainer:732) INFO: 3epoch:train:1901-2000batch: iter_time=1.062e-04, forward_time=0.147, loss_ctc=97.226, loss_att=98.121, acc=0.542, loss=97.853, backward_time=1.080, grad_norm=188.439, clip=100.000, loss_scale=1.049e+06, optim_step_time=0.121, optim0_lr0=2.488e-04, train_time=1.494
+[gpub001:0/128] 2023-07-02 08:22:52,777 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/128] 2023-07-02 08:23:14,999 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 08:23:19,324 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f871ea0bf70>)
+[gpub001:0/128] 2023-07-02 08:23:19,324 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/128] 2023-07-02 08:23:19,328 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 08:30:03,463 (trainer:732) INFO: 3epoch:train:2001-2100batch: iter_time=2.802, forward_time=0.146, loss_ctc=91.586, loss_att=82.817, acc=0.563, loss=85.448, backward_time=1.105, grad_norm=140.635, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.494e-04, train_time=4.444
+[gpub001:0/128] 2023-07-02 08:32:33,043 (trainer:732) INFO: 3epoch:train:2101-2200batch: iter_time=1.024e-04, forward_time=0.147, loss_ctc=98.382, loss_att=88.675, acc=0.561, loss=91.587, backward_time=1.083, grad_norm=161.427, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.481e-04, train_time=1.496
+[gpub001:0/128] 2023-07-02 08:35:02,072 (trainer:732) INFO: 3epoch:train:2201-2300batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=94.521, loss_att=95.214, acc=0.555, loss=95.006, backward_time=1.083, grad_norm=120.258, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.469e-04, train_time=1.490
+[gpub001:0/128] 2023-07-02 08:37:38,878 (trainer:732) INFO: 3epoch:train:2301-2400batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=95.535, loss_att=91.789, acc=0.581, loss=92.913, backward_time=1.089, grad_norm=130.439, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.457e-04, train_time=1.568
+[gpub001:0/128] 2023-07-02 08:37:40,534 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/128] 2023-07-02 08:38:03,105 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 08:38:07,445 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88fb6af520>)
+[gpub001:0/128] 2023-07-02 08:38:07,445 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/128] 2023-07-02 08:38:07,449 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 08:44:55,772 (trainer:732) INFO: 3epoch:train:2401-2500batch: iter_time=1.576, forward_time=0.176, loss_ctc=88.566, loss_att=78.855, acc=0.571, loss=81.769, backward_time=1.099, grad_norm=125.630, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.122, optim0_lr0=2.445e-04, train_time=4.368
+[gpub001:0/128] 2023-07-02 08:47:25,999 (trainer:732) INFO: 3epoch:train:2501-2600batch: iter_time=1.039e-04, forward_time=0.146, loss_ctc=98.401, loss_att=88.608, acc=0.556, loss=91.546, backward_time=1.085, grad_norm=149.550, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.434e-04, train_time=1.502
+[gpub001:0/128] 2023-07-02 08:49:54,806 (trainer:732) INFO: 3epoch:train:2601-2700batch: iter_time=9.939e-05, forward_time=0.146, loss_ctc=92.927, loss_att=93.438, acc=0.553, loss=93.285, backward_time=1.081, grad_norm=122.391, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.422e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 08:52:23,625 (trainer:732) INFO: 3epoch:train:2701-2800batch: iter_time=9.931e-05, forward_time=0.146, loss_ctc=92.326, loss_att=92.323, acc=0.561, loss=92.324, backward_time=1.081, grad_norm=125.602, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.411e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 08:52:26,455 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/128] 2023-07-02 08:52:48,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 08:52:53,114 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8703246260>)
+[gpub001:0/128] 2023-07-02 08:52:53,114 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/128] 2023-07-02 08:52:53,118 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 08:59:50,440 (trainer:732) INFO: 3epoch:train:2801-2900batch: iter_time=1.553, forward_time=0.146, loss_ctc=93.235, loss_att=78.888, acc=0.571, loss=83.192, backward_time=1.117, grad_norm=128.780, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.400e-04, train_time=4.468
+[gpub001:0/128] 2023-07-02 09:02:28,341 (trainer:732) INFO: 3epoch:train:2901-3000batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=96.098, loss_att=83.970, acc=0.569, loss=87.609, backward_time=1.098, grad_norm=147.013, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.389e-04, train_time=1.579
+[gpub001:0/128] 2023-07-02 09:04:57,396 (trainer:732) INFO: 3epoch:train:3001-3100batch: iter_time=9.911e-05, forward_time=0.145, loss_ctc=91.842, loss_att=91.034, acc=0.560, loss=91.276, backward_time=1.081, grad_norm=132.321, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.378e-04, train_time=1.490
+[gpub001:0/128] 2023-07-02 09:07:27,650 (trainer:732) INFO: 3epoch:train:3101-3200batch: iter_time=9.178e-05, forward_time=0.145, loss_ctc=92.726, loss_att=90.434, acc=0.567, loss=91.122, backward_time=1.083, grad_norm=123.287, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.367e-04, train_time=1.502
+[gpub001:0/128] 2023-07-02 09:07:37,004 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/128] 2023-07-02 09:07:59,537 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 09:08:03,842 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86f61b40a0>)
+[gpub001:0/128] 2023-07-02 09:08:03,842 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/128] 2023-07-02 09:08:03,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 09:15:19,332 (trainer:732) INFO: 3epoch:train:3201-3300batch: iter_time=1.643, forward_time=0.155, loss_ctc=86.892, loss_att=74.836, acc=0.582, loss=78.453, backward_time=1.104, grad_norm=119.753, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.357e-04, train_time=4.717
+[gpub001:0/128] 2023-07-02 09:17:49,867 (trainer:732) INFO: 3epoch:train:3301-3400batch: iter_time=9.086e-05, forward_time=0.147, loss_ctc=94.866, loss_att=80.655, acc=0.580, loss=84.918, backward_time=1.083, grad_norm=133.801, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.346e-04, train_time=1.505
+[gpub001:0/128] 2023-07-02 09:20:19,435 (trainer:732) INFO: 3epoch:train:3401-3500batch: iter_time=9.423e-05, forward_time=0.145, loss_ctc=91.040, loss_att=90.233, acc=0.562, loss=90.475, backward_time=1.080, grad_norm=122.931, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.336e-04, train_time=1.495
+[gpub001:0/128] 2023-07-02 09:22:47,882 (trainer:732) INFO: 3epoch:train:3501-3600batch: iter_time=9.105e-05, forward_time=0.145, loss_ctc=90.252, loss_att=86.651, acc=0.579, loss=87.731, backward_time=1.080, grad_norm=128.843, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.326e-04, train_time=1.484
+[gpub001:0/128] 2023-07-02 09:22:49,908 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/128] 2023-07-02 09:23:12,184 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 09:23:16,491 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86e4d83e50>)
+[gpub001:0/128] 2023-07-02 09:23:16,491 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/128] 2023-07-02 09:23:16,495 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 09:29:14,252 (trainer:732) INFO: 3epoch:train:3601-3700batch: iter_time=2.007, forward_time=0.173, loss_ctc=86.671, loss_att=74.898, acc=0.593, loss=78.429, backward_time=1.111, grad_norm=109.421, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.122, optim0_lr0=2.316e-04, train_time=3.863
+[gpub001:0/128] 2023-07-02 09:31:55,295 (trainer:732) INFO: 3epoch:train:3701-3800batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=95.586, loss_att=81.639, acc=0.588, loss=85.823, backward_time=1.093, grad_norm=144.015, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.306e-04, train_time=1.610
+[gpub001:0/128] 2023-07-02 09:34:33,919 (trainer:732) INFO: 3epoch:train:3801-3900batch: iter_time=8.169e-05, forward_time=0.145, loss_ctc=88.969, loss_att=87.806, acc=0.582, loss=88.155, backward_time=1.090, grad_norm=123.158, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.296e-04, train_time=1.586
+[gpub001:0/128] 2023-07-02 09:37:16,204 (trainer:732) INFO: 3epoch:train:3901-4000batch: iter_time=7.505e-05, forward_time=0.146, loss_ctc=90.182, loss_att=85.377, acc=0.604, loss=86.819, backward_time=1.099, grad_norm=111.405, clip=100.000, loss_scale=2.097e+06, optim_step_time=0.121, optim0_lr0=2.287e-04, train_time=1.623
+[gpub001:0/128] 2023-07-02 09:46:42,576 (trainer:338) INFO: 3epoch results: [train] iter_time=0.523, forward_time=0.153, loss_ctc=96.036, loss_att=91.357, acc=0.554, loss=92.761, backward_time=1.093, grad_norm=150.980, clip=100.000, loss_scale=1.573e+06, optim_step_time=0.121, optim0_lr0=2.318e-04, train_time=2.267, time=2 hours, 31 minutes and 22.78 seconds, total_count=12000, gpu_max_cached_mem_GB=37.209, [valid] loss_ctc=94.999, cer_ctc=0.408, loss_att=80.741, acc=0.476, cer=0.524, wer=1.000, loss=85.019, time=3 minutes and 26.69 seconds, total_count=1518, gpu_max_cached_mem_GB=37.209, [att_plot] time=5 minutes and 47.46 seconds, total_count=0, gpu_max_cached_mem_GB=37.209
+[gpub001:0/128] 2023-07-02 09:46:57,926 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub001:0/128] 2023-07-02 09:46:57,928 (trainer:272) INFO: 4/100epoch started. Estimated time to finish: 1 week, 3 days and 22 hours
+[gpub001:0/128] 2023-07-02 09:46:57,931 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/128] 2023-07-02 09:47:20,132 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 09:47:24,739 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8870707040>)
+[gpub001:0/128] 2023-07-02 09:47:24,739 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/128] 2023-07-02 09:47:24,743 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 09:52:16,542 (trainer:732) INFO: 4epoch:train:1-100batch: iter_time=1.603, forward_time=0.147, loss_ctc=98.611, loss_att=79.913, acc=0.566, loss=85.523, backward_time=1.107, grad_norm=126.977, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.277e-04, train_time=3.186
+[gpub001:0/128] 2023-07-02 09:54:48,383 (trainer:732) INFO: 4epoch:train:101-200batch: iter_time=9.905e-05, forward_time=0.146, loss_ctc=93.292, loss_att=81.088, acc=0.588, loss=84.749, backward_time=1.087, grad_norm=140.335, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.268e-04, train_time=1.518
+[gpub001:0/128] 2023-07-02 09:57:28,030 (trainer:732) INFO: 4epoch:train:201-300batch: iter_time=1.015e-04, forward_time=0.144, loss_ctc=87.246, loss_att=71.450, acc=0.583, loss=76.189, backward_time=1.089, grad_norm=114.444, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.259e-04, train_time=1.596
+[gpub001:0/128] 2023-07-02 10:00:28,551 (trainer:732) INFO: 4epoch:train:301-400batch: iter_time=1.028e-04, forward_time=0.265, loss_ctc=98.629, loss_att=87.747, acc=0.596, loss=91.012, backward_time=1.121, grad_norm=129.662, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.124, optim0_lr0=2.249e-04, train_time=1.805
+[gpub001:0/128] 2023-07-02 10:00:31,903 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/128] 2023-07-02 10:00:53,880 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 10:00:58,136 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88cff637f0>)
+[gpub001:0/128] 2023-07-02 10:00:58,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/128] 2023-07-02 10:00:58,163 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 10:07:30,385 (trainer:732) INFO: 4epoch:train:401-500batch: iter_time=1.924, forward_time=0.160, loss_ctc=93.934, loss_att=78.095, acc=0.565, loss=82.847, backward_time=1.116, grad_norm=124.073, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.122, optim0_lr0=2.240e-04, train_time=4.218
+[gpub001:0/128] 2023-07-02 10:10:00,005 (trainer:732) INFO: 4epoch:train:501-600batch: iter_time=9.793e-05, forward_time=0.145, loss_ctc=95.139, loss_att=82.950, acc=0.579, loss=86.606, backward_time=1.084, grad_norm=150.418, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.231e-04, train_time=1.496
+[gpub001:0/128] 2023-07-02 10:12:28,946 (trainer:732) INFO: 4epoch:train:601-700batch: iter_time=9.726e-05, forward_time=0.145, loss_ctc=81.329, loss_att=68.312, acc=0.587, loss=72.217, backward_time=1.082, grad_norm=104.574, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.223e-04, train_time=1.489
+[gpub001:0/128] 2023-07-02 10:14:57,804 (trainer:732) INFO: 4epoch:train:701-800batch: iter_time=9.243e-05, forward_time=0.145, loss_ctc=98.008, loss_att=90.442, acc=0.585, loss=92.712, backward_time=1.081, grad_norm=138.562, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.214e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 10:15:11,245 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/128] 2023-07-02 10:15:34,137 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 10:15:38,468 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f889c601360>)
+[gpub001:0/128] 2023-07-02 10:15:38,468 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/128] 2023-07-02 10:15:38,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 10:20:48,872 (trainer:732) INFO: 4epoch:train:801-900batch: iter_time=1.797, forward_time=0.161, loss_ctc=94.104, loss_att=75.650, acc=0.575, loss=81.186, backward_time=1.104, grad_norm=142.424, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.205e-04, train_time=3.510
+[gpub001:0/128] 2023-07-02 10:23:20,613 (trainer:732) INFO: 4epoch:train:901-1000batch: iter_time=9.979e-05, forward_time=0.145, loss_ctc=91.794, loss_att=79.748, acc=0.589, loss=83.362, backward_time=1.095, grad_norm=112.707, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.197e-04, train_time=1.517
+[gpub001:0/128] 2023-07-02 10:25:49,266 (trainer:732) INFO: 4epoch:train:1001-1100batch: iter_time=1.043e-04, forward_time=0.145, loss_ctc=80.587, loss_att=68.929, acc=0.594, loss=72.427, backward_time=1.079, grad_norm=117.919, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.188e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 10:28:17,736 (trainer:732) INFO: 4epoch:train:1101-1200batch: iter_time=9.663e-05, forward_time=0.145, loss_ctc=96.761, loss_att=88.378, acc=0.589, loss=90.893, backward_time=1.081, grad_norm=118.353, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.180e-04, train_time=1.484
+[gpub001:0/128] 2023-07-02 10:28:19,612 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/128] 2023-07-02 10:28:41,496 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 10:28:45,961 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88cff781f0>)
+[gpub001:0/128] 2023-07-02 10:28:45,961 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/128] 2023-07-02 10:28:45,965 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 10:34:43,152 (trainer:732) INFO: 4epoch:train:1201-1300batch: iter_time=1.606, forward_time=0.181, loss_ctc=91.554, loss_att=74.234, acc=0.591, loss=79.430, backward_time=1.106, grad_norm=113.931, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.172e-04, train_time=3.854
+[gpub001:0/128] 2023-07-02 10:37:13,480 (trainer:732) INFO: 4epoch:train:1301-1400batch: iter_time=7.940e-05, forward_time=0.146, loss_ctc=89.134, loss_att=77.040, acc=0.606, loss=80.668, backward_time=1.084, grad_norm=106.282, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.164e-04, train_time=1.503
+[gpub001:0/128] 2023-07-02 10:39:44,808 (trainer:732) INFO: 4epoch:train:1401-1500batch: iter_time=8.522e-05, forward_time=0.146, loss_ctc=78.916, loss_att=63.992, acc=0.613, loss=68.469, backward_time=1.088, grad_norm=96.651, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.156e-04, train_time=1.513
+[gpub001:0/128] 2023-07-02 10:42:13,272 (trainer:732) INFO: 4epoch:train:1501-1600batch: iter_time=7.945e-05, forward_time=0.145, loss_ctc=96.481, loss_att=83.388, acc=0.614, loss=87.316, backward_time=1.081, grad_norm=147.415, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.148e-04, train_time=1.484
+[gpub001:0/128] 2023-07-02 10:42:23,348 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/128] 2023-07-02 10:42:45,873 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 10:42:50,180 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89107235e0>)
+[gpub001:0/128] 2023-07-02 10:42:50,180 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/128] 2023-07-02 10:42:50,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 10:49:13,034 (trainer:732) INFO: 4epoch:train:1601-1700batch: iter_time=1.974, forward_time=0.173, loss_ctc=88.077, loss_att=72.109, acc=0.591, loss=76.900, backward_time=1.098, grad_norm=115.526, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.140e-04, train_time=4.197
+[gpub001:0/128] 2023-07-02 10:51:42,877 (trainer:732) INFO: 4epoch:train:1701-1800batch: iter_time=7.686e-05, forward_time=0.145, loss_ctc=88.207, loss_att=74.904, acc=0.604, loss=78.895, backward_time=1.083, grad_norm=104.521, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.132e-04, train_time=1.498
+[gpub001:0/128] 2023-07-02 10:54:14,248 (trainer:732) INFO: 4epoch:train:1801-1900batch: iter_time=7.721e-05, forward_time=0.145, loss_ctc=77.966, loss_att=64.135, acc=0.608, loss=68.284, backward_time=1.084, grad_norm=108.621, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.124e-04, train_time=1.513
+[gpub001:0/128] 2023-07-02 10:56:42,821 (trainer:732) INFO: 4epoch:train:1901-2000batch: iter_time=7.336e-05, forward_time=0.144, loss_ctc=93.915, loss_att=85.705, acc=0.603, loss=88.168, backward_time=1.082, grad_norm=116.908, clip=100.000, loss_scale=4.194e+06, optim_step_time=0.121, optim0_lr0=2.117e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 10:56:44,596 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/128] 2023-07-02 10:57:06,859 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 10:57:11,122 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86d139cd90>)
+[gpub001:0/128] 2023-07-02 10:57:11,123 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/128] 2023-07-02 10:57:11,126 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 11:04:53,694 (trainer:732) INFO: 4epoch:train:2001-2100batch: iter_time=1.699, forward_time=0.148, loss_ctc=88.188, loss_att=72.107, acc=0.591, loss=76.931, backward_time=1.102, grad_norm=116.420, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.109e-04, train_time=4.909
+[gpub001:0/128] 2023-07-02 11:07:24,536 (trainer:732) INFO: 4epoch:train:2101-2200batch: iter_time=1.028e-04, forward_time=0.148, loss_ctc=87.151, loss_att=74.177, acc=0.610, loss=78.069, backward_time=1.083, grad_norm=105.729, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.102e-04, train_time=1.508
+[gpub001:0/128] 2023-07-02 11:09:53,133 (trainer:732) INFO: 4epoch:train:2201-2300batch: iter_time=9.744e-05, forward_time=0.147, loss_ctc=79.577, loss_att=65.200, acc=0.609, loss=69.513, backward_time=1.080, grad_norm=100.328, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.094e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 11:12:23,324 (trainer:732) INFO: 4epoch:train:2301-2400batch: iter_time=9.173e-05, forward_time=0.147, loss_ctc=92.968, loss_att=84.240, acc=0.606, loss=86.858, backward_time=1.081, grad_norm=109.427, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.087e-04, train_time=1.502
+[gpub001:0/128] 2023-07-02 11:12:25,037 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/128] 2023-07-02 11:12:47,470 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 11:12:51,797 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88ee35f520>)
+[gpub001:0/128] 2023-07-02 11:12:51,797 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/128] 2023-07-02 11:12:51,801 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 11:19:12,113 (trainer:732) INFO: 4epoch:train:2401-2500batch: iter_time=1.546, forward_time=0.177, loss_ctc=87.759, loss_att=70.861, acc=0.599, loss=75.930, backward_time=1.115, grad_norm=136.550, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.122, optim0_lr0=2.080e-04, train_time=4.087
+[gpub001:0/128] 2023-07-02 11:21:41,041 (trainer:732) INFO: 4epoch:train:2501-2600batch: iter_time=7.723e-05, forward_time=0.145, loss_ctc=86.882, loss_att=73.524, acc=0.613, loss=77.532, backward_time=1.082, grad_norm=106.358, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.072e-04, train_time=1.489
+[gpub001:0/128] 2023-07-02 11:24:18,226 (trainer:732) INFO: 4epoch:train:2601-2700batch: iter_time=8.031e-05, forward_time=0.144, loss_ctc=78.351, loss_att=64.279, acc=0.611, loss=68.501, backward_time=1.102, grad_norm=109.443, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.065e-04, train_time=1.572
+[gpub001:0/128] 2023-07-02 11:27:00,239 (trainer:732) INFO: 4epoch:train:2701-2800batch: iter_time=8.117e-05, forward_time=0.144, loss_ctc=93.410, loss_att=83.593, acc=0.610, loss=86.538, backward_time=1.102, grad_norm=114.063, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.058e-04, train_time=1.620
+[gpub001:0/128] 2023-07-02 11:27:05,797 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/128] 2023-07-02 11:27:27,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 11:27:32,128 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8709e16e30>)
+[gpub001:0/128] 2023-07-02 11:27:32,128 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/128] 2023-07-02 11:27:32,132 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 11:34:05,899 (trainer:732) INFO: 4epoch:train:2801-2900batch: iter_time=2.044, forward_time=0.173, loss_ctc=87.698, loss_att=72.670, acc=0.606, loss=77.179, backward_time=1.104, grad_norm=115.150, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.051e-04, train_time=4.256
+[gpub001:0/128] 2023-07-02 11:36:36,429 (trainer:732) INFO: 4epoch:train:2901-3000batch: iter_time=9.052e-05, forward_time=0.147, loss_ctc=86.042, loss_att=73.284, acc=0.624, loss=77.111, backward_time=1.087, grad_norm=107.460, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.045e-04, train_time=1.505
+[gpub001:0/128] 2023-07-02 11:39:05,178 (trainer:732) INFO: 4epoch:train:3001-3100batch: iter_time=7.764e-05, forward_time=0.145, loss_ctc=76.124, loss_att=61.174, acc=0.628, loss=65.659, backward_time=1.081, grad_norm=99.575, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.038e-04, train_time=1.487
+[gpub001:0/128] 2023-07-02 11:41:33,800 (trainer:732) INFO: 4epoch:train:3101-3200batch: iter_time=7.370e-05, forward_time=0.144, loss_ctc=90.784, loss_att=79.546, acc=0.630, loss=82.917, backward_time=1.082, grad_norm=105.692, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.031e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 11:41:41,143 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/128] 2023-07-02 11:42:03,545 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 11:42:07,889 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86d0fad630>)
+[gpub001:0/128] 2023-07-02 11:42:07,889 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/128] 2023-07-02 11:42:07,893 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 11:48:55,570 (trainer:732) INFO: 4epoch:train:3201-3300batch: iter_time=2.460, forward_time=0.170, loss_ctc=86.826, loss_att=70.191, acc=0.612, loss=75.182, backward_time=1.105, grad_norm=122.938, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.024e-04, train_time=4.417
+[gpub001:0/128] 2023-07-02 11:51:25,881 (trainer:732) INFO: 4epoch:train:3301-3400batch: iter_time=9.428e-05, forward_time=0.147, loss_ctc=85.421, loss_att=72.776, acc=0.625, loss=76.569, backward_time=1.086, grad_norm=116.810, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.018e-04, train_time=1.503
+[gpub001:0/128] 2023-07-02 11:53:54,354 (trainer:732) INFO: 4epoch:train:3401-3500batch: iter_time=8.940e-05, forward_time=0.146, loss_ctc=76.243, loss_att=61.089, acc=0.628, loss=65.635, backward_time=1.079, grad_norm=102.428, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.011e-04, train_time=1.485
+[gpub001:0/128] 2023-07-02 11:56:51,221 (trainer:732) INFO: 4epoch:train:3501-3600batch: iter_time=8.309e-05, forward_time=0.146, loss_ctc=90.417, loss_att=79.209, acc=0.635, loss=82.572, backward_time=1.115, grad_norm=107.842, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=2.005e-04, train_time=1.768
+[gpub001:0/128] 2023-07-02 11:56:54,712 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/128] 2023-07-02 11:57:16,883 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 11:57:21,231 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f873e90d510>)
+[gpub001:0/128] 2023-07-02 11:57:21,231 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/128] 2023-07-02 11:57:21,235 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 12:04:28,387 (trainer:732) INFO: 4epoch:train:3601-3700batch: iter_time=1.574, forward_time=0.203, loss_ctc=87.050, loss_att=69.244, acc=0.619, loss=74.586, backward_time=1.115, grad_norm=102.912, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.123, optim0_lr0=1.998e-04, train_time=4.571
+[gpub001:0/128] 2023-07-02 12:06:58,225 (trainer:732) INFO: 4epoch:train:3701-3800batch: iter_time=9.426e-05, forward_time=0.147, loss_ctc=85.563, loss_att=72.811, acc=0.627, loss=76.637, backward_time=1.083, grad_norm=108.872, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=1.992e-04, train_time=1.499
+[gpub001:0/128] 2023-07-02 12:09:29,231 (trainer:732) INFO: 4epoch:train:3801-3900batch: iter_time=9.519e-05, forward_time=0.146, loss_ctc=76.919, loss_att=60.499, acc=0.633, loss=65.425, backward_time=1.079, grad_norm=100.011, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=1.986e-04, train_time=1.510
+[gpub001:0/128] 2023-07-02 12:11:59,799 (trainer:732) INFO: 4epoch:train:3901-4000batch: iter_time=8.595e-05, forward_time=0.146, loss_ctc=90.077, loss_att=77.617, acc=0.639, loss=81.355, backward_time=1.084, grad_norm=116.280, clip=100.000, loss_scale=8.389e+06, optim_step_time=0.121, optim0_lr0=1.979e-04, train_time=1.505
+[gpub001:0/128] 2023-07-02 12:21:57,381 (trainer:338) INFO: 4epoch results: [train] iter_time=0.456, forward_time=0.155, loss_ctc=88.178, loss_att=74.657, acc=0.605, loss=78.714, backward_time=1.092, grad_norm=115.865, clip=100.000, loss_scale=6.291e+06, optim_step_time=0.121, optim0_lr0=2.118e-04, train_time=2.175, time=2 hours, 25 minutes and 14.26 seconds, total_count=16000, gpu_max_cached_mem_GB=37.211, [valid] loss_ctc=85.064, cer_ctc=0.401, loss_att=69.007, acc=0.517, cer=0.516, wer=1.000, loss=73.824, time=3 minutes and 47.38 seconds, total_count=2024, gpu_max_cached_mem_GB=37.211, [att_plot] time=5 minutes and 57.81 seconds, total_count=0, gpu_max_cached_mem_GB=37.211
+[gpub001:0/128] 2023-07-02 12:22:12,814 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub001:0/128] 2023-07-02 12:22:12,816 (trainer:272) INFO: 5/100epoch started. Estimated time to finish: 1 week, 3 days and 16 hours
+[gpub001:0/128] 2023-07-02 12:22:12,819 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/128] 2023-07-02 12:22:35,284 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 12:22:41,131 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8830ed2770>)
+[gpub001:0/128] 2023-07-02 12:22:41,131 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/128] 2023-07-02 12:22:42,017 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 12:31:54,897 (trainer:732) INFO: 5epoch:train:1-100batch: iter_time=4.188, forward_time=0.167, loss_ctc=95.831, loss_att=83.013, acc=0.566, loss=86.858, backward_time=1.104, grad_norm=128.232, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.973e-04, train_time=5.821
+[gpub001:0/128] 2023-07-02 12:34:31,184 (trainer:732) INFO: 5epoch:train:101-200batch: iter_time=7.943e-05, forward_time=0.145, loss_ctc=86.928, loss_att=65.908, acc=0.615, loss=72.214, backward_time=1.097, grad_norm=107.199, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.967e-04, train_time=1.563
+[gpub001:0/128] 2023-07-02 12:37:14,909 (trainer:732) INFO: 5epoch:train:201-300batch: iter_time=8.068e-05, forward_time=0.145, loss_ctc=82.643, loss_att=66.057, acc=0.622, loss=71.033, backward_time=1.123, grad_norm=117.355, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.961e-04, train_time=1.637
+[gpub001:0/128] 2023-07-02 12:39:43,827 (trainer:732) INFO: 5epoch:train:301-400batch: iter_time=8.139e-05, forward_time=0.146, loss_ctc=95.498, loss_att=80.022, acc=0.595, loss=84.665, backward_time=1.081, grad_norm=144.339, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.955e-04, train_time=1.489
+[gpub001:0/128] 2023-07-02 12:39:51,380 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/128] 2023-07-02 12:40:12,992 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 12:40:17,133 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f90c2c61d20>)
+[gpub001:0/128] 2023-07-02 12:40:17,133 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/128] 2023-07-02 12:40:17,137 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 12:48:12,918 (trainer:732) INFO: 5epoch:train:401-500batch: iter_time=1.579, forward_time=0.147, loss_ctc=88.461, loss_att=76.736, acc=0.586, loss=80.254, backward_time=1.098, grad_norm=113.460, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.949e-04, train_time=5.091
+[gpub001:0/128] 2023-07-02 12:50:45,684 (trainer:732) INFO: 5epoch:train:501-600batch: iter_time=1.028e-04, forward_time=0.147, loss_ctc=85.023, loss_att=63.231, acc=0.628, loss=69.769, backward_time=1.084, grad_norm=107.817, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.943e-04, train_time=1.527
+[gpub001:0/128] 2023-07-02 12:53:13,961 (trainer:732) INFO: 5epoch:train:601-700batch: iter_time=1.013e-04, forward_time=0.144, loss_ctc=80.852, loss_att=65.135, acc=0.627, loss=69.850, backward_time=1.076, grad_norm=121.490, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.937e-04, train_time=1.483
+[gpub001:0/128] 2023-07-02 12:55:42,736 (trainer:732) INFO: 5epoch:train:701-800batch: iter_time=9.932e-05, forward_time=0.145, loss_ctc=94.110, loss_att=78.547, acc=0.604, loss=83.216, backward_time=1.079, grad_norm=120.640, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.932e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 12:55:44,316 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/128] 2023-07-02 12:56:07,456 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 12:56:11,744 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8869fa10f0>)
+[gpub001:0/128] 2023-07-02 12:56:11,744 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/128] 2023-07-02 12:56:11,748 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 13:02:22,172 (trainer:732) INFO: 5epoch:train:801-900batch: iter_time=1.509, forward_time=0.148, loss_ctc=88.112, loss_att=77.763, acc=0.577, loss=80.868, backward_time=1.137, grad_norm=106.985, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.926e-04, train_time=3.994
+[gpub001:0/128] 2023-07-02 13:04:50,899 (trainer:732) INFO: 5epoch:train:901-1000batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=83.503, loss_att=64.856, acc=0.623, loss=70.450, backward_time=1.080, grad_norm=105.383, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.920e-04, train_time=1.487
+[gpub001:0/128] 2023-07-02 13:07:19,780 (trainer:732) INFO: 5epoch:train:1001-1100batch: iter_time=1.212e-04, forward_time=0.148, loss_ctc=78.804, loss_att=62.053, acc=0.633, loss=67.078, backward_time=1.083, grad_norm=99.605, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.915e-04, train_time=1.489
+[gpub001:0/128] 2023-07-02 13:09:48,658 (trainer:732) INFO: 5epoch:train:1101-1200batch: iter_time=1.177e-04, forward_time=0.147, loss_ctc=91.446, loss_att=76.278, acc=0.600, loss=80.828, backward_time=1.080, grad_norm=117.116, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.909e-04, train_time=1.489
+[gpub001:0/128] 2023-07-02 13:09:50,669 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/128] 2023-07-02 13:10:12,649 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 13:10:17,106 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f891998b970>)
+[gpub001:0/128] 2023-07-02 13:10:17,106 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/128] 2023-07-02 13:10:17,110 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 13:16:06,569 (trainer:732) INFO: 5epoch:train:1201-1300batch: iter_time=1.607, forward_time=0.175, loss_ctc=85.120, loss_att=72.798, acc=0.593, loss=76.495, backward_time=1.101, grad_norm=103.506, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.122, optim0_lr0=1.903e-04, train_time=3.779
+[gpub001:0/128] 2023-07-02 13:18:36,586 (trainer:732) INFO: 5epoch:train:1301-1400batch: iter_time=1.060e-04, forward_time=0.145, loss_ctc=84.666, loss_att=64.196, acc=0.625, loss=70.337, backward_time=1.081, grad_norm=125.999, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.898e-04, train_time=1.500
+[gpub001:0/128] 2023-07-02 13:21:05,506 (trainer:732) INFO: 5epoch:train:1401-1500batch: iter_time=1.041e-04, forward_time=0.148, loss_ctc=76.989, loss_att=60.444, acc=0.639, loss=65.407, backward_time=1.082, grad_norm=93.583, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.892e-04, train_time=1.489
+[gpub001:0/128] 2023-07-02 13:23:34,157 (trainer:732) INFO: 5epoch:train:1501-1600batch: iter_time=8.877e-05, forward_time=0.147, loss_ctc=91.522, loss_att=76.267, acc=0.603, loss=80.844, backward_time=1.081, grad_norm=103.188, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.887e-04, train_time=1.486
+[gpub001:0/128] 2023-07-02 13:23:54,184 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/128] 2023-07-02 13:24:16,462 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 13:24:20,736 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8917e767d0>)
+[gpub001:0/128] 2023-07-02 13:24:20,736 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/128] 2023-07-02 13:24:20,752 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 13:30:23,399 (trainer:732) INFO: 5epoch:train:1601-1700batch: iter_time=2.283, forward_time=0.146, loss_ctc=86.174, loss_att=73.323, acc=0.604, loss=77.178, backward_time=1.124, grad_norm=110.109, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.882e-04, train_time=4.092
+[gpub001:0/128] 2023-07-02 13:32:53,039 (trainer:732) INFO: 5epoch:train:1701-1800batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=81.258, loss_att=60.193, acc=0.643, loss=66.513, backward_time=1.081, grad_norm=124.281, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.876e-04, train_time=1.496
+[gpub001:0/128] 2023-07-02 13:35:25,312 (trainer:732) INFO: 5epoch:train:1801-1900batch: iter_time=1.034e-04, forward_time=0.147, loss_ctc=76.781, loss_att=61.004, acc=0.646, loss=65.737, backward_time=1.083, grad_norm=106.544, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.871e-04, train_time=1.523
+[gpub001:0/128] 2023-07-02 13:38:08,039 (trainer:732) INFO: 5epoch:train:1901-2000batch: iter_time=1.047e-04, forward_time=0.147, loss_ctc=89.325, loss_att=74.875, acc=0.619, loss=79.210, backward_time=1.121, grad_norm=109.861, clip=100.000, loss_scale=1.678e+07, optim_step_time=0.121, optim0_lr0=1.866e-04, train_time=1.627
+[gpub001:0/128] 2023-07-02 13:38:12,719 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/128] 2023-07-02 13:38:34,773 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 13:38:39,038 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8713fcbbb0>)
+[gpub001:0/128] 2023-07-02 13:38:39,038 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/128] 2023-07-02 13:38:39,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 13:45:41,717 (trainer:732) INFO: 5epoch:train:2001-2100batch: iter_time=1.533, forward_time=0.146, loss_ctc=86.087, loss_att=72.796, acc=0.601, loss=76.784, backward_time=1.099, grad_norm=114.369, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.861e-04, train_time=4.537
+[gpub001:0/128] 2023-07-02 13:48:14,220 (trainer:732) INFO: 5epoch:train:2101-2200batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=81.831, loss_att=60.613, acc=0.644, loss=66.978, backward_time=1.088, grad_norm=131.370, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.856e-04, train_time=1.525
+[gpub001:0/128] 2023-07-02 13:50:51,032 (trainer:732) INFO: 5epoch:train:2201-2300batch: iter_time=1.024e-04, forward_time=0.146, loss_ctc=77.795, loss_att=60.560, acc=0.646, loss=65.730, backward_time=1.089, grad_norm=113.810, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.851e-04, train_time=1.568
+[gpub001:0/128] 2023-07-02 13:53:32,141 (trainer:732) INFO: 5epoch:train:2301-2400batch: iter_time=1.003e-04, forward_time=0.147, loss_ctc=89.325, loss_att=72.509, acc=0.624, loss=77.553, backward_time=1.096, grad_norm=104.473, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.845e-04, train_time=1.611
+[gpub001:0/128] 2023-07-02 13:53:33,805 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/128] 2023-07-02 13:53:56,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 13:54:00,777 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88dae611e0>)
+[gpub001:0/128] 2023-07-02 13:54:00,778 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/128] 2023-07-02 13:54:00,781 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 14:00:22,334 (trainer:732) INFO: 5epoch:train:2401-2500batch: iter_time=1.553, forward_time=0.186, loss_ctc=83.017, loss_att=72.643, acc=0.601, loss=75.755, backward_time=1.101, grad_norm=101.899, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.122, optim0_lr0=1.840e-04, train_time=4.102
+[gpub001:0/128] 2023-07-02 14:02:53,893 (trainer:732) INFO: 5epoch:train:2501-2600batch: iter_time=1.039e-04, forward_time=0.143, loss_ctc=82.586, loss_att=62.473, acc=0.636, loss=68.507, backward_time=1.082, grad_norm=109.136, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.835e-04, train_time=1.515
+[gpub001:0/128] 2023-07-02 14:05:22,448 (trainer:732) INFO: 5epoch:train:2601-2700batch: iter_time=1.084e-04, forward_time=0.144, loss_ctc=78.078, loss_att=60.349, acc=0.643, loss=65.668, backward_time=1.077, grad_norm=104.700, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.831e-04, train_time=1.485
+[gpub001:0/128] 2023-07-02 14:08:06,353 (trainer:732) INFO: 5epoch:train:2701-2800batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=88.355, loss_att=73.214, acc=0.612, loss=77.756, backward_time=1.092, grad_norm=114.943, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.826e-04, train_time=1.639
+[gpub001:0/128] 2023-07-02 14:08:14,829 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/128] 2023-07-02 14:08:36,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 14:08:41,179 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f871df77670>)
+[gpub001:0/128] 2023-07-02 14:08:41,179 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/128] 2023-07-02 14:08:41,183 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 14:16:05,822 (trainer:732) INFO: 5epoch:train:2801-2900batch: iter_time=1.639, forward_time=0.145, loss_ctc=84.011, loss_att=71.273, acc=0.614, loss=75.095, backward_time=1.103, grad_norm=125.591, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.821e-04, train_time=4.794
+[gpub001:0/128] 2023-07-02 14:18:39,318 (trainer:732) INFO: 5epoch:train:2901-3000batch: iter_time=2.634e-04, forward_time=0.168, loss_ctc=81.248, loss_att=59.851, acc=0.647, loss=66.270, backward_time=1.091, grad_norm=91.920, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.124, optim0_lr0=1.816e-04, train_time=1.535
+[gpub001:0/128] 2023-07-02 14:21:11,194 (trainer:732) INFO: 5epoch:train:3001-3100batch: iter_time=1.143e-04, forward_time=0.162, loss_ctc=75.675, loss_att=60.444, acc=0.652, loss=65.013, backward_time=1.084, grad_norm=93.222, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.811e-04, train_time=1.518
+[gpub001:0/128] 2023-07-02 14:23:48,791 (trainer:732) INFO: 5epoch:train:3101-3200batch: iter_time=1.141e-04, forward_time=0.164, loss_ctc=90.248, loss_att=72.932, acc=0.623, loss=78.127, backward_time=1.095, grad_norm=113.248, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.122, optim0_lr0=1.807e-04, train_time=1.576
+[gpub001:0/128] 2023-07-02 14:23:59,201 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/128] 2023-07-02 14:24:21,746 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 14:24:26,057 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86c4a72ce0>)
+[gpub001:0/128] 2023-07-02 14:24:26,057 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/128] 2023-07-02 14:24:26,061 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 14:31:50,659 (trainer:732) INFO: 5epoch:train:3201-3300batch: iter_time=1.865, forward_time=0.167, loss_ctc=83.228, loss_att=69.985, acc=0.610, loss=73.958, backward_time=1.102, grad_norm=113.113, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.122, optim0_lr0=1.802e-04, train_time=4.818
+[gpub001:0/128] 2023-07-02 14:34:19,355 (trainer:732) INFO: 5epoch:train:3301-3400batch: iter_time=9.341e-05, forward_time=0.144, loss_ctc=80.645, loss_att=61.484, acc=0.643, loss=67.232, backward_time=1.079, grad_norm=94.831, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.797e-04, train_time=1.487
+[gpub001:0/128] 2023-07-02 14:36:52,892 (trainer:732) INFO: 5epoch:train:3401-3500batch: iter_time=9.983e-05, forward_time=0.145, loss_ctc=74.661, loss_att=58.553, acc=0.648, loss=63.385, backward_time=1.091, grad_norm=112.096, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.793e-04, train_time=1.535
+[gpub001:0/128] 2023-07-02 14:39:27,050 (trainer:732) INFO: 5epoch:train:3501-3600batch: iter_time=8.702e-05, forward_time=0.147, loss_ctc=87.700, loss_att=72.286, acc=0.618, loss=76.910, backward_time=1.091, grad_norm=108.539, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.788e-04, train_time=1.541
+[gpub001:0/128] 2023-07-02 14:39:34,517 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/128] 2023-07-02 14:39:57,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 14:40:01,384 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8724914df0>)
+[gpub001:0/128] 2023-07-02 14:40:01,385 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/128] 2023-07-02 14:40:01,388 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 14:47:22,407 (trainer:732) INFO: 5epoch:train:3601-3700batch: iter_time=2.338, forward_time=0.146, loss_ctc=82.381, loss_att=70.294, acc=0.609, loss=73.920, backward_time=1.111, grad_norm=95.828, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.783e-04, train_time=4.753
+[gpub001:0/128] 2023-07-02 14:49:58,572 (trainer:732) INFO: 5epoch:train:3701-3800batch: iter_time=1.275e-04, forward_time=0.147, loss_ctc=78.720, loss_att=59.946, acc=0.648, loss=65.578, backward_time=1.090, grad_norm=91.671, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.779e-04, train_time=1.561
+[gpub001:0/128] 2023-07-02 14:52:32,393 (trainer:732) INFO: 5epoch:train:3801-3900batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=76.291, loss_att=59.368, acc=0.648, loss=64.445, backward_time=1.080, grad_norm=102.358, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.774e-04, train_time=1.538
+[gpub001:0/128] 2023-07-02 14:55:03,373 (trainer:732) INFO: 5epoch:train:3901-4000batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=86.476, loss_att=71.726, acc=0.622, loss=76.151, backward_time=1.081, grad_norm=105.900, clip=100.000, loss_scale=3.355e+07, optim_step_time=0.121, optim0_lr0=1.770e-04, train_time=1.510
+[gpub001:0/128] 2023-07-02 15:04:31,604 (trainer:338) INFO: 5epoch results: [train] iter_time=0.502, forward_time=0.150, loss_ctc=84.285, loss_att=68.150, acc=0.621, loss=72.991, backward_time=1.092, grad_norm=110.243, clip=100.000, loss_scale=2.517e+07, optim_step_time=0.121, optim0_lr0=1.866e-04, train_time=2.292, time=2 hours, 33 minutes and 0.88 seconds, total_count=20000, gpu_max_cached_mem_GB=37.211, [valid] loss_ctc=72.987, cer_ctc=0.380, loss_att=58.253, acc=0.554, cer=0.483, wer=0.990, loss=62.673, time=3 minutes and 29.52 seconds, total_count=2530, gpu_max_cached_mem_GB=37.211, [att_plot] time=5 minutes and 48.38 seconds, total_count=0, gpu_max_cached_mem_GB=37.211
+[gpub001:0/128] 2023-07-02 15:04:47,134 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub001:0/128] 2023-07-02 15:04:47,137 (trainer:272) INFO: 6/100epoch started. Estimated time to finish: 1 week, 3 days and 14 hours
+[gpub001:0/128] 2023-07-02 15:04:47,140 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/128] 2023-07-02 15:05:08,659 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 15:05:12,886 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884bf7b940>)
+[gpub001:0/128] 2023-07-02 15:05:12,886 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/128] 2023-07-02 15:05:12,890 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 15:09:55,173 (trainer:732) INFO: 6epoch:train:1-100batch: iter_time=1.516, forward_time=0.173, loss_ctc=92.368, loss_att=76.121, acc=0.626, loss=80.995, backward_time=1.103, grad_norm=126.531, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.765e-04, train_time=3.080
+[gpub001:0/128] 2023-07-02 15:12:32,565 (trainer:732) INFO: 6epoch:train:101-200batch: iter_time=9.775e-05, forward_time=0.144, loss_ctc=83.143, loss_att=68.670, acc=0.613, loss=73.012, backward_time=1.090, grad_norm=97.187, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.761e-04, train_time=1.574
+[gpub001:0/128] 2023-07-02 15:15:24,315 (trainer:732) INFO: 6epoch:train:201-300batch: iter_time=1.016e-04, forward_time=0.145, loss_ctc=84.306, loss_att=66.722, acc=0.648, loss=71.997, backward_time=1.105, grad_norm=107.360, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.757e-04, train_time=1.717
+[gpub001:0/128] 2023-07-02 15:18:00,929 (trainer:732) INFO: 6epoch:train:301-400batch: iter_time=1.021e-04, forward_time=0.145, loss_ctc=91.913, loss_att=79.037, acc=0.636, loss=82.900, backward_time=1.090, grad_norm=110.113, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.752e-04, train_time=1.566
+[gpub001:0/128] 2023-07-02 15:18:17,251 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/128] 2023-07-02 15:18:39,584 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 15:18:43,772 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d1f0bfa0>)
+[gpub001:0/128] 2023-07-02 15:18:43,772 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/128] 2023-07-02 15:18:43,861 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 15:25:56,982 (trainer:732) INFO: 6epoch:train:401-500batch: iter_time=2.079, forward_time=0.167, loss_ctc=92.693, loss_att=72.766, acc=0.624, loss=78.744, backward_time=1.096, grad_norm=118.287, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.748e-04, train_time=4.760
+[gpub001:0/128] 2023-07-02 15:28:27,102 (trainer:732) INFO: 6epoch:train:501-600batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=81.911, loss_att=68.694, acc=0.611, loss=72.659, backward_time=1.081, grad_norm=97.202, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.744e-04, train_time=1.501
+[gpub001:0/128] 2023-07-02 15:30:56,188 (trainer:732) INFO: 6epoch:train:601-700batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=83.558, loss_att=64.117, acc=0.651, loss=69.949, backward_time=1.080, grad_norm=106.948, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.740e-04, train_time=1.491
+[gpub001:0/128] 2023-07-02 15:33:25,358 (trainer:732) INFO: 6epoch:train:701-800batch: iter_time=1.124e-04, forward_time=0.145, loss_ctc=87.949, loss_att=76.437, acc=0.632, loss=79.891, backward_time=1.082, grad_norm=108.261, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.735e-04, train_time=1.491
+[gpub001:0/128] 2023-07-02 15:33:27,210 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/128] 2023-07-02 15:33:49,343 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 15:33:53,609 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d1678130>)
+[gpub001:0/128] 2023-07-02 15:33:53,609 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/128] 2023-07-02 15:33:53,613 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 15:40:08,819 (trainer:732) INFO: 6epoch:train:801-900batch: iter_time=1.543, forward_time=0.145, loss_ctc=89.210, loss_att=70.814, acc=0.647, loss=76.333, backward_time=1.102, grad_norm=134.895, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.731e-04, train_time=4.034
+[gpub001:0/128] 2023-07-02 15:42:38,135 (trainer:732) INFO: 6epoch:train:901-1000batch: iter_time=1.010e-04, forward_time=0.145, loss_ctc=82.434, loss_att=67.557, acc=0.627, loss=72.020, backward_time=1.083, grad_norm=95.861, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.727e-04, train_time=1.493
+[gpub001:0/128] 2023-07-02 15:45:07,723 (trainer:732) INFO: 6epoch:train:1001-1100batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=81.188, loss_att=62.844, acc=0.667, loss=68.347, backward_time=1.083, grad_norm=93.331, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.723e-04, train_time=1.496
+[gpub001:0/128] 2023-07-02 15:47:38,402 (trainer:732) INFO: 6epoch:train:1101-1200batch: iter_time=1.003e-04, forward_time=0.147, loss_ctc=88.287, loss_att=74.520, acc=0.648, loss=78.650, backward_time=1.086, grad_norm=111.618, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.719e-04, train_time=1.507
+[gpub001:0/128] 2023-07-02 15:47:48,256 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/128] 2023-07-02 15:48:09,974 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 15:48:14,373 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f86bd4dbfa0>)
+[gpub001:0/128] 2023-07-02 15:48:14,373 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/128] 2023-07-02 15:48:14,377 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 15:54:23,324 (trainer:732) INFO: 6epoch:train:1201-1300batch: iter_time=1.601, forward_time=0.159, loss_ctc=88.348, loss_att=73.412, acc=0.647, loss=77.893, backward_time=1.101, grad_norm=112.423, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.715e-04, train_time=4.049
+[gpub001:0/128] 2023-07-02 15:56:53,521 (trainer:732) INFO: 6epoch:train:1301-1400batch: iter_time=1.244e-04, forward_time=0.146, loss_ctc=79.386, loss_att=65.071, acc=0.634, loss=69.365, backward_time=1.082, grad_norm=105.505, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.711e-04, train_time=1.502
+[gpub001:0/128] 2023-07-02 15:59:22,392 (trainer:732) INFO: 6epoch:train:1401-1500batch: iter_time=1.308e-04, forward_time=0.148, loss_ctc=81.481, loss_att=62.703, acc=0.665, loss=68.336, backward_time=1.081, grad_norm=101.416, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.707e-04, train_time=1.488
+[gpub001:0/128] 2023-07-02 16:01:52,855 (trainer:732) INFO: 6epoch:train:1501-1600batch: iter_time=1.191e-04, forward_time=0.148, loss_ctc=87.263, loss_att=73.197, acc=0.653, loss=77.416, backward_time=1.084, grad_norm=107.970, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.703e-04, train_time=1.504
+[gpub001:0/128] 2023-07-02 16:01:58,402 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/128] 2023-07-02 16:02:20,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/128] 2023-07-02 16:02:24,729 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88df77ff10>)
+[gpub001:0/128] 2023-07-02 16:02:24,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=22796, batch_size=256, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/128] 2023-07-02 16:02:24,733 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=22796, mean=256.0, min=256, max=257
+[gpub001:0/128] 2023-07-02 16:07:49,090 (trainer:732) INFO: 6epoch:train:1601-1700batch: iter_time=1.671, forward_time=0.171, loss_ctc=90.466, loss_att=70.502, acc=0.651, loss=76.491, backward_time=1.106, grad_norm=124.483, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.123, optim0_lr0=1.699e-04, train_time=3.562
+[gpub001:0/128] 2023-07-02 16:10:20,476 (trainer:732) INFO: 6epoch:train:1701-1800batch: iter_time=9.879e-05, forward_time=0.146, loss_ctc=80.510, loss_att=64.788, acc=0.634, loss=69.505, backward_time=1.086, grad_norm=108.106, clip=100.000, loss_scale=6.711e+07, optim_step_time=0.121, optim0_lr0=1.695e-04, train_time=1.514
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py:481: UserWarning: An error happens at loading "dump/raw/org/GigaST/XL.en-de/data/format.49/data_wav.ark:1438393521"
+  warnings.warn('An error happens at loading "{}"'.format(ark_name))
+ERROR:root:Error happened with path=exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5, type=kaldi_ark, id=GigaST_YOU0000008013_005722080_005750350_en_st_de
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 510, in train_one_epoch
+    for iiter, (utt_id, batch) in enumerate(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/reporter.py", line 267, in measure_iter_time
+    retval = next(iterator)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/iterators/multiple_iter_factory.py", line 35, in build_iter
+    yield from iter_factory.build_iter(epoch, shuffle)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
+    data = self._next_data()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
+    return self._process_data(data)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
+    data.reraise()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_utils.py", line 543, in reraise
+    raise exception
+PermissionError: Caught PermissionError in DataLoader worker process 1.
+Original Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
+    data = fetcher.fetch(index)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
+    data = [self.dataset[idx] for idx in possibly_batched_index]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
+    data = [self.dataset[idx] for idx in possibly_batched_index]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 513, in __getitem__
+    value = loader[uid]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/dataset.py", line 52, in __getitem__
+    retval = self.loader[key]
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 479, in __getitem__
+    return self._loader(ark_name)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/matio.py", line 235, in load_mat
+    fd_dict[ark] = open_like_kaldi(ark, "rb")
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/kaldiio/utils.py", line 207, in open_like_kaldi
+    return io.open(name, mode, encoding=encoding)
+PermissionError: [Errno 13] Permission denied: 'dump/raw/org/GigaST/XL.en-de/data/format.49/data_wav.ark'
+
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+srun: error: gpub040: task 14: Exited with exit code 1
+slurmstepd: error: *** STEP 2115302.0 ON gpub001 CANCELLED AT 2023-07-02T16:14:40 ***
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log
new file mode 100644
index 0000000000000000000000000000000000000000..acb0d4eb57452bcc77640f5c9b58a42ec38842b2
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.2.log
@@ -0,0 +1,4663 @@
+# Running on gpub002.delta.ncsa.illinois.edu
+# Started at Wed Jul 12 13:15:16 CDT 2023
+# SLURMD_NODENAME=gpub002
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2147805
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2147805
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[002,008,010-011,019,027-028,030,050-053,073-074,078,084]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=2108111
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub002
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_dea1b7df-6390-451b-bb9e-0e3133584ca1
+[gpub002:0/64] 2023-07-12 13:18:48,677 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub002:0/64] 2023-07-12 13:18:49,830 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub002:0/64] 2023-07-12 13:18:49,864 (s2t:483) INFO: Vocabulary size: 50002
+[gpub002:0/64] 2023-07-12 13:19:04,645 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub002:0/64] 2023-07-12 13:19:04,653 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub002:0/64] 2023-07-12 13:19:04,667 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub002:0/64] 2023-07-12 13:19:05,366 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub002:0/64] 2023-07-12 13:19:13,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbf9ea0beb0>)
+[gpub002:0/64] 2023-07-12 13:19:14,187 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub002:0/64] 2023-07-12 13:19:14,194 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub002:0/64] 2023-07-12 13:19:14,680 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbf9ea0bb50>)
+[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub002:0/64] 2023-07-12 13:19:15,003 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub002:0/64] 2023-07-12 13:19:42,133 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub002:2108199:2108199 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108199:2108199 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108199:2108199 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub002:0/64] 2023-07-12 13:19:47,191 (trainer:284) INFO: 40/50epoch started
+[gpub002:0/64] 2023-07-12 13:19:47,237 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-12 13:20:04,995 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd89b625f0>)
+[gpub002:0/64] 2023-07-12 13:20:08,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-12 13:20:08,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub011:1718215:1718215 [0] NCCL INFO cudaDriverVersion 12010
+gpub011:1718215:1718215 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718215:1718215 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718215:1718290 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718215:1718290 [0] NCCL INFO Using network IB
+gpub011:1718215:1718290 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub011:1718215:1718290 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub011:1718215:1718290 [0] NCCL INFO Connected all rings
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub011:1718215:1718290 [0] NCCL INFO Connected all trees
+gpub011:1718215:1718290 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718215:1718290 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718215:1718290 [0] NCCL INFO comm 0x8e227720 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub011:1718216:1718216 [1] NCCL INFO cudaDriverVersion 12010
+gpub011:1718216:1718216 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718216:1718216 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718216:1718288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718216:1718288 [1] NCCL INFO Using network IB
+gpub011:1718216:1718288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub011:1718216:1718288 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Connected all rings
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub011:1718216:1718288 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub011:1718216:1718288 [1] NCCL INFO Connected all trees
+gpub011:1718216:1718288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718216:1718288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718216:1718288 [1] NCCL INFO comm 0x9d351fa0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub011:1718218:1718218 [3] NCCL INFO cudaDriverVersion 12010
+gpub011:1718218:1718218 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718218:1718218 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718218:1718289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718218:1718289 [3] NCCL INFO Using network IB
+gpub011:1718218:1718289 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub011:1718218:1718289 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub011:1718218:1718289 [3] NCCL INFO Connected all rings
+gpub011:1718218:1718289 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub011:1718218:1718289 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub011:1718218:1718289 [3] NCCL INFO Connected all trees
+gpub011:1718218:1718289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718218:1718289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718218:1718289 [3] NCCL INFO comm 0x4fae7090 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub084:95632:95632 [3] NCCL INFO cudaDriverVersion 12010
+gpub084:95632:95632 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95632:95632 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95632:95714 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95632:95714 [3] NCCL INFO Using network IB
+gpub084:95632:95714 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub084:95632:95714 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub084:95632:95714 [3] NCCL INFO Connected all rings
+gpub084:95632:95714 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub084:95632:95714 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub084:95632:95714 [3] NCCL INFO Connected all trees
+gpub084:95632:95714 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95632:95714 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95632:95714 [3] NCCL INFO comm 0x9d28050 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub011:1718217:1718217 [2] NCCL INFO cudaDriverVersion 12010
+gpub011:1718217:1718217 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.111<0>
+gpub011:1718217:1718217 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub011:1718217:1718291 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.111<0>
+gpub011:1718217:1718291 [2] NCCL INFO Using network IB
+gpub011:1718217:1718291 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub011:1718217:1718291 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Connected all rings
+gpub011:1718217:1718291 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub011:1718217:1718291 [2] NCCL INFO Connected all trees
+gpub011:1718217:1718291 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub011:1718217:1718291 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub011:1718217:1718291 [2] NCCL INFO comm 0x50a009a0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub019:2611991:2611991 [2] NCCL INFO cudaDriverVersion 12010
+gpub019:2611991:2611991 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611991:2611991 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611991:2612065 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611991:2612065 [2] NCCL INFO Using network IB
+gpub019:2611991:2612065 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub019:2611991:2612065 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Connected all rings
+gpub019:2611991:2612065 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub019:2611991:2612065 [2] NCCL INFO Connected all trees
+gpub019:2611991:2612065 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611991:2612065 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611991:2612065 [2] NCCL INFO comm 0x10048ab0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:95631:95631 [2] NCCL INFO cudaDriverVersion 12010
+gpub084:95631:95631 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95631:95631 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95631:95712 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95631:95712 [2] NCCL INFO Using network IB
+gpub084:95631:95712 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub084:95631:95712 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Connected all rings
+gpub084:95631:95712 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub084:95631:95712 [2] NCCL INFO Connected all trees
+gpub084:95631:95712 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95631:95712 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95631:95712 [2] NCCL INFO comm 0x940c750 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:3855653:3855653 [1] NCCL INFO cudaDriverVersion 12010
+gpub074:3855653:3855653 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855653:3855653 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855653:3855727 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855653:3855727 [1] NCCL INFO Using network IB
+gpub074:3855653:3855727 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub074:3855653:3855727 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Connected all rings
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub074:3855653:3855727 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub074:3855653:3855727 [1] NCCL INFO Connected all trees
+gpub074:3855653:3855727 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855653:3855727 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855653:3855727 [1] NCCL INFO comm 0xaa1acf00 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub074:3855655:3855655 [3] NCCL INFO cudaDriverVersion 12010
+gpub074:3855655:3855655 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855655:3855655 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855655:3855725 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855655:3855725 [3] NCCL INFO Using network IB
+gpub074:3855655:3855725 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub074:3855655:3855725 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub074:3855655:3855725 [3] NCCL INFO Connected all rings
+gpub074:3855655:3855725 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub074:3855655:3855725 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub074:3855655:3855725 [3] NCCL INFO Connected all trees
+gpub074:3855655:3855725 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855655:3855725 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855655:3855725 [3] NCCL INFO comm 0x509a28d0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:3855652:3855652 [0] NCCL INFO cudaDriverVersion 12010
+gpub074:3855652:3855652 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855652:3855652 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855652:3855726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855652:3855726 [0] NCCL INFO Using network IB
+gpub074:3855652:3855726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub074:3855652:3855726 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub074:3855652:3855726 [0] NCCL INFO Connected all rings
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub074:3855652:3855726 [0] NCCL INFO Connected all trees
+gpub074:3855652:3855726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855652:3855726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855652:3855726 [0] NCCL INFO comm 0x8e164a10 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub019:2611989:2611989 [0] NCCL INFO cudaDriverVersion 12010
+gpub019:2611989:2611989 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611989:2611989 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611989:2612066 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611989:2612066 [0] NCCL INFO Using network IB
+gpub019:2611989:2612066 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub019:2611989:2612066 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub019:2611989:2612066 [0] NCCL INFO Connected all rings
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub019:2611989:2612066 [0] NCCL INFO Connected all trees
+gpub019:2611989:2612066 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611989:2612066 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611989:2612066 [0] NCCL INFO comm 0xa8ee89f0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub019:2611992:2611992 [3] NCCL INFO cudaDriverVersion 12010
+gpub019:2611992:2611992 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611992:2611992 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611992:2612064 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611992:2612064 [3] NCCL INFO Using network IB
+gpub019:2611992:2612064 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub019:2611992:2612064 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub019:2611992:2612064 [3] NCCL INFO Connected all rings
+gpub019:2611992:2612064 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub019:2611992:2612064 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub019:2611992:2612064 [3] NCCL INFO Connected all trees
+gpub019:2611992:2612064 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611992:2612064 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611992:2612064 [3] NCCL INFO comm 0x4fcf2500 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub028:3104067:3104067 [0] NCCL INFO cudaDriverVersion 12010
+gpub028:3104067:3104067 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104067:3104067 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104067:3104152 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104067:3104152 [0] NCCL INFO Using network IB
+gpub028:3104067:3104152 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub028:3104067:3104152 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub028:3104067:3104152 [0] NCCL INFO Connected all rings
+gpub008:2789793:2789793 [0] NCCL INFO cudaDriverVersion 12010
+gpub008:2789793:2789793 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789793:2789793 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789793:2789871 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789793:2789871 [0] NCCL INFO Using network IB
+gpub008:2789793:2789871 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub008:2789793:2789871 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub008:2789793:2789871 [0] NCCL INFO Connected all rings
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub028:3104067:3104152 [0] NCCL INFO Connected all trees
+gpub028:3104067:3104152 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104067:3104152 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104067:3104152 [0] NCCL INFO comm 0xa17fea0 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub008:2789793:2789871 [0] NCCL INFO Connected all trees
+gpub008:2789793:2789871 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789793:2789871 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789793:2789871 [0] NCCL INFO comm 0x9e41e050 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub010:1746407:1746407 [0] NCCL INFO cudaDriverVersion 12010
+gpub010:1746407:1746407 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746407:1746407 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746407:1746486 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746407:1746486 [0] NCCL INFO Using network IB
+gpub010:1746407:1746486 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub010:1746407:1746486 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub010:1746407:1746486 [0] NCCL INFO Connected all rings
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub010:1746407:1746486 [0] NCCL INFO Connected all trees
+gpub010:1746407:1746486 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746407:1746486 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746407:1746486 [0] NCCL INFO comm 0xa1f0110 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub010:1746410:1746410 [3] NCCL INFO cudaDriverVersion 12010
+gpub010:1746410:1746410 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746410:1746410 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746410:1746485 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746410:1746485 [3] NCCL INFO Using network IB
+gpub010:1746410:1746485 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub010:1746410:1746485 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub010:1746410:1746485 [3] NCCL INFO Connected all rings
+gpub010:1746410:1746485 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub010:1746410:1746485 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub010:1746410:1746485 [3] NCCL INFO Connected all trees
+gpub010:1746410:1746485 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746410:1746485 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746410:1746485 [3] NCCL INFO comm 0x95b8eb50 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub027:3834396:3834396 [0] NCCL INFO cudaDriverVersion 12010
+gpub027:3834396:3834396 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834396:3834396 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834396:3834476 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834396:3834476 [0] NCCL INFO Using network IB
+gpub027:3834396:3834476 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub027:3834396:3834476 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub027:3834396:3834476 [0] NCCL INFO Connected all rings
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub027:3834396:3834476 [0] NCCL INFO Connected all trees
+gpub027:3834396:3834476 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834396:3834476 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834396:3834476 [0] NCCL INFO comm 0x8b8afd50 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub019:2611990:2611990 [1] NCCL INFO cudaDriverVersion 12010
+gpub019:2611990:2611990 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.119<0>
+gpub019:2611990:2611990 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub019:2611990:2612063 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.119<0>
+gpub019:2611990:2612063 [1] NCCL INFO Using network IB
+gpub019:2611990:2612063 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub019:2611990:2612063 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Connected all rings
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub019:2611990:2612063 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub019:2611990:2612063 [1] NCCL INFO Connected all trees
+gpub019:2611990:2612063 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub019:2611990:2612063 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub019:2611990:2612063 [1] NCCL INFO comm 0x8916a60 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub030:2867869:2867869 [0] NCCL INFO cudaDriverVersion 12010
+gpub030:2867869:2867869 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867869:2867869 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867869:2867948 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867869:2867948 [0] NCCL INFO Using network IB
+gpub030:2867869:2867948 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub030:2867869:2867948 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub030:2867869:2867948 [0] NCCL INFO Connected all rings
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub030:2867869:2867948 [0] NCCL INFO Connected all trees
+gpub030:2867869:2867948 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867869:2867948 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867869:2867948 [0] NCCL INFO comm 0x236c1590 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub074:3855654:3855654 [2] NCCL INFO cudaDriverVersion 12010
+gpub074:3855654:3855654 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3855654:3855654 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3855654:3855724 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3855654:3855724 [2] NCCL INFO Using network IB
+gpub074:3855654:3855724 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub074:3855654:3855724 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Connected all rings
+gpub074:3855654:3855724 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub074:3855654:3855724 [2] NCCL INFO Connected all trees
+gpub074:3855654:3855724 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3855654:3855724 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3855654:3855724 [2] NCCL INFO comm 0xba937820 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub030:2867872:2867872 [3] NCCL INFO cudaDriverVersion 12010
+gpub030:2867872:2867872 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867872:2867872 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867872:2867950 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867872:2867950 [3] NCCL INFO Using network IB
+gpub030:2867872:2867950 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub030:2867872:2867950 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub030:2867872:2867950 [3] NCCL INFO Connected all rings
+gpub030:2867872:2867950 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub030:2867872:2867950 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub030:2867872:2867950 [3] NCCL INFO Connected all trees
+gpub030:2867872:2867950 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867872:2867950 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867872:2867950 [3] NCCL INFO comm 0x8db50450 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub078:387633:387633 [0] NCCL INFO cudaDriverVersion 12010
+gpub078:387633:387633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387633:387633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387633:387710 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387633:387710 [0] NCCL INFO Using network IB
+gpub078:387633:387710 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub078:387633:387710 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:387633:387710 [0] NCCL INFO Connected all rings
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub078:387633:387710 [0] NCCL INFO Connected all trees
+gpub078:387633:387710 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387633:387710 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387633:387710 [0] NCCL INFO comm 0x8b083970 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub030:2867870:2867870 [1] NCCL INFO cudaDriverVersion 12010
+gpub030:2867870:2867870 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867870:2867870 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867870:2867949 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867870:2867949 [1] NCCL INFO Using network IB
+gpub030:2867870:2867949 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub030:2867870:2867949 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Connected all rings
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub030:2867870:2867949 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub030:2867870:2867949 [1] NCCL INFO Connected all trees
+gpub030:2867870:2867949 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867870:2867949 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867870:2867949 [1] NCCL INFO comm 0x9c29c010 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2789796:2789796 [3] NCCL INFO cudaDriverVersion 12010
+gpub008:2789796:2789796 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789796:2789796 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789796:2789872 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789796:2789872 [3] NCCL INFO Using network IB
+gpub008:2789796:2789872 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub008:2789796:2789872 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub008:2789796:2789872 [3] NCCL INFO Connected all rings
+gpub008:2789796:2789872 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub008:2789796:2789872 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub008:2789796:2789872 [3] NCCL INFO Connected all trees
+gpub008:2789796:2789872 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789796:2789872 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789796:2789872 [3] NCCL INFO comm 0x50597af0 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:2037082:2037082 [0] NCCL INFO cudaDriverVersion 12010
+gpub053:2037082:2037082 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037082:2037082 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037082:2037160 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037082:2037160 [0] NCCL INFO Using network IB
+gpub053:2037082:2037160 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub053:2037082:2037160 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub053:2037082:2037160 [0] NCCL INFO Connected all rings
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub053:2037082:2037160 [0] NCCL INFO Connected all trees
+gpub053:2037082:2037160 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037082:2037160 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037082:2037160 [0] NCCL INFO comm 0x50aa6090 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub027:3834399:3834399 [3] NCCL INFO cudaDriverVersion 12010
+gpub027:3834399:3834399 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834399:3834399 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834399:3834474 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834399:3834474 [3] NCCL INFO Using network IB
+gpub027:3834399:3834474 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub027:3834399:3834474 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub027:3834399:3834474 [3] NCCL INFO Connected all rings
+gpub027:3834399:3834474 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub027:3834399:3834474 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub027:3834399:3834474 [3] NCCL INFO Connected all trees
+gpub027:3834399:3834474 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834399:3834474 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834399:3834474 [3] NCCL INFO comm 0x8f1f3890 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:2037083:2037083 [1] NCCL INFO cudaDriverVersion 12010
+gpub053:2037083:2037083 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037083:2037083 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037083:2037161 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037083:2037161 [1] NCCL INFO Using network IB
+gpub053:2037083:2037161 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub053:2037083:2037161 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Connected all rings
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub053:2037083:2037161 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub053:2037083:2037161 [1] NCCL INFO Connected all trees
+gpub053:2037083:2037161 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037083:2037161 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037083:2037161 [1] NCCL INFO comm 0x4f89c530 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2789795:2789795 [2] NCCL INFO cudaDriverVersion 12010
+gpub008:2789795:2789795 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789795:2789795 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789795:2789874 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789795:2789874 [2] NCCL INFO Using network IB
+gpub008:2789795:2789874 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub008:2789795:2789874 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Connected all rings
+gpub008:2789795:2789874 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub008:2789795:2789874 [2] NCCL INFO Connected all trees
+gpub008:2789795:2789874 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789795:2789874 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789795:2789874 [2] NCCL INFO comm 0xb7cc7790 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:387636:387636 [3] NCCL INFO cudaDriverVersion 12010
+gpub078:387636:387636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387636:387636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387636:387711 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387636:387711 [3] NCCL INFO Using network IB
+gpub078:387636:387711 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub078:387636:387711 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:387636:387711 [3] NCCL INFO Connected all rings
+gpub078:387636:387711 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:387636:387711 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:387636:387711 [3] NCCL INFO Connected all trees
+gpub078:387636:387711 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387636:387711 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387636:387711 [3] NCCL INFO comm 0x50bf4280 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub073:748599:748599 [2] NCCL INFO cudaDriverVersion 12010
+gpub073:748599:748599 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748599:748599 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748599:748672 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748599:748672 [2] NCCL INFO Using network IB
+gpub073:748599:748672 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub073:748599:748672 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Connected all rings
+gpub073:748599:748672 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub073:748599:748672 [2] NCCL INFO Connected all trees
+gpub073:748599:748672 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748599:748672 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748599:748672 [2] NCCL INFO comm 0xa2d1650 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub053:2037084:2037084 [2] NCCL INFO cudaDriverVersion 12010
+gpub053:2037084:2037084 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037084:2037084 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037084:2037163 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037084:2037163 [2] NCCL INFO Using network IB
+gpub053:2037084:2037163 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub053:2037084:2037163 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Connected all rings
+gpub053:2037084:2037163 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub053:2037084:2037163 [2] NCCL INFO Connected all trees
+gpub053:2037084:2037163 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037084:2037163 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037084:2037163 [2] NCCL INFO comm 0x8c08e1a0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:387635:387635 [2] NCCL INFO cudaDriverVersion 12010
+gpub078:387635:387635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387635:387635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387635:387713 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387635:387713 [2] NCCL INFO Using network IB
+gpub078:387635:387713 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub078:387635:387713 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Connected all rings
+gpub078:387635:387713 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:387635:387713 [2] NCCL INFO Connected all trees
+gpub078:387635:387713 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387635:387713 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387635:387713 [2] NCCL INFO comm 0x9a633940 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub028:3104070:3104070 [3] NCCL INFO cudaDriverVersion 12010
+gpub028:3104070:3104070 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104070:3104070 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104070:3104149 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104070:3104149 [3] NCCL INFO Using network IB
+gpub028:3104070:3104149 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub028:3104070:3104149 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub028:3104070:3104149 [3] NCCL INFO Connected all rings
+gpub028:3104070:3104149 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub028:3104070:3104149 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub028:3104070:3104149 [3] NCCL INFO Connected all trees
+gpub028:3104070:3104149 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104070:3104149 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104070:3104149 [3] NCCL INFO comm 0xb81c6b50 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:2037085:2037085 [3] NCCL INFO cudaDriverVersion 12010
+gpub053:2037085:2037085 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:2037085:2037085 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:2037085:2037162 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:2037085:2037162 [3] NCCL INFO Using network IB
+gpub053:2037085:2037162 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub053:2037085:2037162 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub053:2037085:2037162 [3] NCCL INFO Connected all rings
+gpub053:2037085:2037162 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub053:2037085:2037162 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub053:2037085:2037162 [3] NCCL INFO Connected all trees
+gpub053:2037085:2037162 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:2037085:2037162 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:2037085:2037162 [3] NCCL INFO comm 0x5026aaa0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub050:2539553:2539553 [1] NCCL INFO cudaDriverVersion 12010
+gpub050:2539553:2539553 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539553:2539553 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539553:2539629 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539553:2539629 [1] NCCL INFO Using network IB
+gpub050:2539553:2539629 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub050:2539553:2539629 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Connected all rings
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub050:2539553:2539629 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub050:2539553:2539629 [1] NCCL INFO Connected all trees
+gpub050:2539553:2539629 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539553:2539629 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539553:2539629 [1] NCCL INFO comm 0xa4859b0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub027:3834397:3834397 [1] NCCL INFO cudaDriverVersion 12010
+gpub027:3834397:3834397 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834397:3834397 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834397:3834475 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834397:3834475 [1] NCCL INFO Using network IB
+gpub027:3834397:3834475 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub027:3834397:3834475 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Connected all rings
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub027:3834397:3834475 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub027:3834397:3834475 [1] NCCL INFO Connected all trees
+gpub027:3834397:3834475 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834397:3834475 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834397:3834475 [1] NCCL INFO comm 0x8ed34290 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub028:3104068:3104068 [1] NCCL INFO cudaDriverVersion 12010
+gpub028:3104068:3104068 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104068:3104068 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104068:3104151 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104068:3104151 [1] NCCL INFO Using network IB
+gpub028:3104068:3104151 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub028:3104068:3104151 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Connected all rings
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub028:3104068:3104151 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub028:3104068:3104151 [1] NCCL INFO Connected all trees
+gpub028:3104068:3104151 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104068:3104151 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104068:3104151 [1] NCCL INFO comm 0xb8c85b80 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub052:2277064:2277064 [2] NCCL INFO cudaDriverVersion 12010
+gpub052:2277064:2277064 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277064:2277064 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277064:2277141 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277064:2277141 [2] NCCL INFO Using network IB
+gpub052:2277064:2277141 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub052:2277064:2277141 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Connected all rings
+gpub052:2277064:2277141 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub052:2277064:2277141 [2] NCCL INFO Connected all trees
+gpub052:2277064:2277141 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277064:2277141 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277064:2277141 [2] NCCL INFO comm 0xa4d0c250 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:387634:387634 [1] NCCL INFO cudaDriverVersion 12010
+gpub078:387634:387634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:387634:387634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:387634:387712 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:387634:387712 [1] NCCL INFO Using network IB
+gpub078:387634:387712 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub078:387634:387712 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Connected all rings
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub078:387634:387712 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:387634:387712 [1] NCCL INFO Connected all trees
+gpub078:387634:387712 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:387634:387712 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:387634:387712 [1] NCCL INFO comm 0xb893bfd0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub051:3225329:3225329 [1] NCCL INFO cudaDriverVersion 12010
+gpub051:3225329:3225329 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225329:3225329 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225329:3225407 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225329:3225407 [1] NCCL INFO Using network IB
+gpub051:3225329:3225407 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub051:3225329:3225407 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Connected all rings
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub084:95630:95630 [1] NCCL INFO cudaDriverVersion 12010
+gpub084:95630:95630 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95630:95630 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95630:95713 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95630:95713 [1] NCCL INFO Using network IB
+gpub084:95630:95713 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub084:95630:95713 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Connected all rings
+gpub084:95630:95713 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub084:95630:95713 [1] NCCL INFO Connected all trees
+gpub051:3225329:3225407 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub051:3225329:3225407 [1] NCCL INFO Connected all trees
+gpub051:3225329:3225407 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225329:3225407 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225329:3225407 [1] NCCL INFO comm 0xa2b18990 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub084:95630:95713 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95630:95713 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95630:95713 [1] NCCL INFO comm 0x505266b0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub030:2867871:2867871 [2] NCCL INFO cudaDriverVersion 12010
+gpub030:2867871:2867871 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2867871:2867871 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2867871:2867947 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2867871:2867947 [2] NCCL INFO Using network IB
+gpub030:2867871:2867947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub030:2867871:2867947 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Connected all rings
+gpub030:2867871:2867947 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub002:2108202:2108202 [3] NCCL INFO cudaDriverVersion 12010
+gpub002:2108202:2108202 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108202:2108202 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108202:2108274 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108202:2108274 [3] NCCL INFO Using network IB
+gpub002:2108202:2108274 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub002:2108202:2108274 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub002:2108202:2108274 [3] NCCL INFO Connected all rings
+gpub002:2108202:2108274 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub002:2108202:2108274 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub030:2867871:2867947 [2] NCCL INFO Connected all trees
+gpub030:2867871:2867947 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2867871:2867947 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2867871:2867947 [2] NCCL INFO comm 0x516c8220 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:2108202:2108274 [3] NCCL INFO Connected all trees
+gpub002:2108202:2108274 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108202:2108274 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108202:2108274 [3] NCCL INFO comm 0xba66c350 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub028:3104069:3104069 [2] NCCL INFO cudaDriverVersion 12010
+gpub028:3104069:3104069 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.128<0>
+gpub028:3104069:3104069 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub028:3104069:3104150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.128<0>
+gpub028:3104069:3104150 [2] NCCL INFO Using network IB
+gpub028:3104069:3104150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub028:3104069:3104150 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Connected all rings
+gpub028:3104069:3104150 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub028:3104069:3104150 [2] NCCL INFO Connected all trees
+gpub028:3104069:3104150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub028:3104069:3104150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub028:3104069:3104150 [2] NCCL INFO comm 0x50c3cd20 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:95629:95629 [0] NCCL INFO cudaDriverVersion 12010
+gpub084:95629:95629 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:95629:95629 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:95629:95715 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:95629:95715 [0] NCCL INFO Using network IB
+gpub084:95629:95715 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub084:95629:95715 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub084:95629:95715 [0] NCCL INFO Connected all rings
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub084:95629:95715 [0] NCCL INFO Connected all trees
+gpub084:95629:95715 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:95629:95715 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:95629:95715 [0] NCCL INFO comm 0x4f579950 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:2539555:2539555 [3] NCCL INFO cudaDriverVersion 12010
+gpub050:2539555:2539555 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539555:2539555 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539555:2539630 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539555:2539630 [3] NCCL INFO Using network IB
+gpub050:2539555:2539630 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub050:2539555:2539630 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub050:2539555:2539630 [3] NCCL INFO Connected all rings
+gpub050:2539555:2539630 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub050:2539555:2539630 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub050:2539555:2539630 [3] NCCL INFO Connected all trees
+gpub050:2539555:2539630 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539555:2539630 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539555:2539630 [3] NCCL INFO comm 0xb939ca50 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:2277062:2277062 [0] NCCL INFO cudaDriverVersion 12010
+gpub052:2277062:2277062 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277062:2277062 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277062:2277138 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277062:2277138 [0] NCCL INFO Using network IB
+gpub052:2277062:2277138 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub052:2277062:2277138 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub052:2277062:2277138 [0] NCCL INFO Connected all rings
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub052:2277062:2277138 [0] NCCL INFO Connected all trees
+gpub052:2277062:2277138 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277062:2277138 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277062:2277138 [0] NCCL INFO comm 0x8b3e450 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub010:1746408:1746408 [1] NCCL INFO cudaDriverVersion 12010
+gpub010:1746408:1746408 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746408:1746408 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746408:1746484 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746408:1746484 [1] NCCL INFO Using network IB
+gpub010:1746408:1746484 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub010:1746408:1746484 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Connected all rings
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub010:1746408:1746484 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub010:1746408:1746484 [1] NCCL INFO Connected all trees
+gpub010:1746408:1746484 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746408:1746484 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746408:1746484 [1] NCCL INFO comm 0xab889a50 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub010:1746409:1746409 [2] NCCL INFO cudaDriverVersion 12010
+gpub010:1746409:1746409 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.110<0>
+gpub010:1746409:1746409 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub010:1746409:1746487 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.110<0>
+gpub010:1746409:1746487 [2] NCCL INFO Using network IB
+gpub010:1746409:1746487 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub010:1746409:1746487 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Connected all rings
+gpub010:1746409:1746487 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub010:1746409:1746487 [2] NCCL INFO Connected all trees
+gpub010:1746409:1746487 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub010:1746409:1746487 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub010:1746409:1746487 [2] NCCL INFO comm 0x8d5443e0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:2108200:2108200 [1] NCCL INFO cudaDriverVersion 12010
+gpub002:2108200:2108200 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108200:2108200 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108200:2108275 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108200:2108275 [1] NCCL INFO Using network IB
+gpub002:2108200:2108275 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub002:2108200:2108275 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Connected all rings
+gpub002:2108200:2108275 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub002:2108200:2108275 [1] NCCL INFO Connected all trees
+gpub002:2108200:2108275 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108200:2108275 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108200:2108275 [1] NCCL INFO comm 0x8e8ce8d0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:2108199:2108273 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108199:2108273 [0] NCCL INFO Using network IB
+gpub002:2108199:2108273 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub002:2108199:2108273 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub002:2108199:2108273 [0] NCCL INFO Connected all rings
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub002:2108199:2108273 [0] NCCL INFO Connected all trees
+gpub002:2108199:2108273 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108199:2108273 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108199:2108273 [0] NCCL INFO comm 0x8d0b120 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub073:748600:748600 [3] NCCL INFO cudaDriverVersion 12010
+gpub073:748600:748600 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748600:748600 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748600:748671 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748600:748671 [3] NCCL INFO Using network IB
+gpub073:748600:748671 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub073:748600:748671 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub073:748600:748671 [3] NCCL INFO Connected all rings
+gpub073:748600:748671 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub073:748600:748671 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub051:3225328:3225328 [0] NCCL INFO cudaDriverVersion 12010
+gpub051:3225328:3225328 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225328:3225328 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225328:3225405 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225328:3225405 [0] NCCL INFO Using network IB
+gpub051:3225328:3225405 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub051:3225328:3225405 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub051:3225328:3225405 [0] NCCL INFO Connected all rings
+gpub073:748600:748671 [3] NCCL INFO Connected all trees
+gpub073:748600:748671 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748600:748671 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748600:748671 [3] NCCL INFO comm 0x4f8ebf60 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub051:3225328:3225405 [0] NCCL INFO Connected all trees
+gpub051:3225328:3225405 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225328:3225405 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225328:3225405 [0] NCCL INFO comm 0x4f680190 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:2539554:2539554 [2] NCCL INFO cudaDriverVersion 12010
+gpub050:2539554:2539554 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539554:2539554 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539554:2539627 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539554:2539627 [2] NCCL INFO Using network IB
+gpub050:2539554:2539627 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub050:2539554:2539627 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Connected all rings
+gpub050:2539554:2539627 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub050:2539554:2539627 [2] NCCL INFO Connected all trees
+gpub050:2539554:2539627 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539554:2539627 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539554:2539627 [2] NCCL INFO comm 0xa469b710 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub027:3834398:3834398 [2] NCCL INFO cudaDriverVersion 12010
+gpub027:3834398:3834398 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.127<0>
+gpub027:3834398:3834398 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub027:3834398:3834473 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.127<0>
+gpub027:3834398:3834473 [2] NCCL INFO Using network IB
+gpub027:3834398:3834473 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub027:3834398:3834473 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Connected all rings
+gpub027:3834398:3834473 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub027:3834398:3834473 [2] NCCL INFO Connected all trees
+gpub027:3834398:3834473 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub027:3834398:3834473 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub027:3834398:3834473 [2] NCCL INFO comm 0x505e2640 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub073:748598:748598 [1] NCCL INFO cudaDriverVersion 12010
+gpub073:748598:748598 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748598:748598 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748598:748673 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748598:748673 [1] NCCL INFO Using network IB
+gpub073:748598:748673 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub073:748598:748673 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Connected all rings
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub073:748598:748673 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub073:748598:748673 [1] NCCL INFO Connected all trees
+gpub073:748598:748673 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748598:748673 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748598:748673 [1] NCCL INFO comm 0xb7883d00 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub073:748597:748597 [0] NCCL INFO cudaDriverVersion 12010
+gpub073:748597:748597 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:748597:748597 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:748597:748674 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:748597:748674 [0] NCCL INFO Using network IB
+gpub073:748597:748674 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub073:748597:748674 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub073:748597:748674 [0] NCCL INFO Connected all rings
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub073:748597:748674 [0] NCCL INFO Connected all trees
+gpub073:748597:748674 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:748597:748674 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:748597:748674 [0] NCCL INFO comm 0xa03dfc0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:2539552:2539552 [0] NCCL INFO cudaDriverVersion 12010
+gpub050:2539552:2539552 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2539552:2539552 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2539552:2539628 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2539552:2539628 [0] NCCL INFO Using network IB
+gpub050:2539552:2539628 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub050:2539552:2539628 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub050:2539552:2539628 [0] NCCL INFO Connected all rings
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub050:2539552:2539628 [0] NCCL INFO Connected all trees
+gpub050:2539552:2539628 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2539552:2539628 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2539552:2539628 [0] NCCL INFO comm 0xaafdc050 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub052:2277063:2277063 [1] NCCL INFO cudaDriverVersion 12010
+gpub052:2277063:2277063 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277063:2277063 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277063:2277140 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277063:2277140 [1] NCCL INFO Using network IB
+gpub052:2277063:2277140 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub052:2277063:2277140 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Connected all rings
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub052:2277063:2277140 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub052:2277063:2277140 [1] NCCL INFO Connected all trees
+gpub052:2277063:2277140 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277063:2277140 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277063:2277140 [1] NCCL INFO comm 0xa865590 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub008:2789794:2789794 [1] NCCL INFO cudaDriverVersion 12010
+gpub008:2789794:2789794 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.108<0>
+gpub008:2789794:2789794 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub008:2789794:2789873 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.108<0>
+gpub008:2789794:2789873 [1] NCCL INFO Using network IB
+gpub008:2789794:2789873 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub008:2789794:2789873 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Connected all rings
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub008:2789794:2789873 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub008:2789794:2789873 [1] NCCL INFO Connected all trees
+gpub008:2789794:2789873 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub008:2789794:2789873 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub008:2789794:2789873 [1] NCCL INFO comm 0x8abbf8b0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:2108201:2108201 [2] NCCL INFO cudaDriverVersion 12010
+gpub002:2108201:2108201 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:2108201:2108201 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:2108201:2108276 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:2108201:2108276 [2] NCCL INFO Using network IB
+gpub002:2108201:2108276 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub002:2108201:2108276 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Connected all rings
+gpub002:2108201:2108276 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub002:2108201:2108276 [2] NCCL INFO Connected all trees
+gpub002:2108201:2108276 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:2108201:2108276 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:2108201:2108276 [2] NCCL INFO comm 0x8ca2cb90 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub051:3225330:3225330 [2] NCCL INFO cudaDriverVersion 12010
+gpub051:3225330:3225330 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225330:3225330 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225330:3225408 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225330:3225408 [2] NCCL INFO Using network IB
+gpub051:3225330:3225408 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub051:3225330:3225408 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Connected all rings
+gpub051:3225330:3225408 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub051:3225330:3225408 [2] NCCL INFO Connected all trees
+gpub051:3225330:3225408 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225330:3225408 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225330:3225408 [2] NCCL INFO comm 0x4f59a920 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub051:3225331:3225331 [3] NCCL INFO cudaDriverVersion 12010
+gpub051:3225331:3225331 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:3225331:3225331 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:3225331:3225406 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:3225331:3225406 [3] NCCL INFO Using network IB
+gpub051:3225331:3225406 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub051:3225331:3225406 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub051:3225331:3225406 [3] NCCL INFO Connected all rings
+gpub051:3225331:3225406 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub051:3225331:3225406 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub051:3225331:3225406 [3] NCCL INFO Connected all trees
+gpub051:3225331:3225406 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:3225331:3225406 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:3225331:3225406 [3] NCCL INFO comm 0xb371b610 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:2277065:2277065 [3] NCCL INFO cudaDriverVersion 12010
+gpub052:2277065:2277065 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2277065:2277065 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2277065:2277139 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2277065:2277139 [3] NCCL INFO Using network IB
+gpub052:2277065:2277139 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub052:2277065:2277139 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub052:2277065:2277139 [3] NCCL INFO Connected all rings
+gpub052:2277065:2277139 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub052:2277065:2277139 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub052:2277065:2277139 [3] NCCL INFO Connected all trees
+gpub052:2277065:2277139 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2277065:2277139 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2277065:2277139 [3] NCCL INFO comm 0x8f38890 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub002:0/64] 2023-07-12 13:27:01,090 (trainer:732) INFO: 40epoch:train:1-100batch: iter_time=1.208, forward_time=0.235, loss_ctc=61.116, loss_att=44.050, acc=0.697, loss=49.170, backward_time=1.036, grad_norm=103.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.735e-05, train_time=8.676
+[gpub002:0/64] 2023-07-12 13:29:17,100 (trainer:732) INFO: 40epoch:train:101-200batch: iter_time=1.264e-04, forward_time=0.142, loss_ctc=72.135, loss_att=57.235, acc=0.700, loss=61.705, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 13:31:32,346 (trainer:732) INFO: 40epoch:train:201-300batch: iter_time=1.291e-04, forward_time=0.142, loss_ctc=81.043, loss_att=57.325, acc=0.717, loss=64.440, backward_time=1.025, grad_norm=163.403, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.734e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 13:33:47,706 (trainer:732) INFO: 40epoch:train:301-400batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=71.829, loss_att=55.277, acc=0.697, loss=60.243, backward_time=1.026, grad_norm=110.550, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.733e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 13:36:02,885 (trainer:732) INFO: 40epoch:train:401-500batch: iter_time=1.285e-04, forward_time=0.143, loss_ctc=72.347, loss_att=53.215, acc=0.711, loss=58.954, backward_time=1.025, grad_norm=113.868, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.732e-05, train_time=2.703
+[gpub002:0/64] 2023-07-12 13:38:27,982 (trainer:732) INFO: 40epoch:train:501-600batch: iter_time=1.282e-04, forward_time=0.141, loss_ctc=68.400, loss_att=46.292, acc=0.689, loss=52.924, backward_time=1.032, grad_norm=117.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.902
+[gpub002:0/64] 2023-07-12 13:40:47,434 (trainer:732) INFO: 40epoch:train:601-700batch: iter_time=1.231e-04, forward_time=0.142, loss_ctc=68.083, loss_att=50.252, acc=0.715, loss=55.601, backward_time=1.028, grad_norm=108.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.731e-05, train_time=2.789
+[gpub002:0/64] 2023-07-12 13:43:10,107 (trainer:732) INFO: 40epoch:train:701-800batch: iter_time=1.157e-04, forward_time=0.142, loss_ctc=68.676, loss_att=52.334, acc=0.698, loss=57.237, backward_time=1.024, grad_norm=108.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.730e-05, train_time=2.852
+[gpub002:0/64] 2023-07-12 13:44:08,310 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-12 13:44:25,786 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6abd2bfa0>)
+[gpub002:0/64] 2023-07-12 13:44:29,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-12 13:44:29,176 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 13:49:29,871 (trainer:732) INFO: 40epoch:train:801-900batch: iter_time=2.128, forward_time=0.187, loss_ctc=62.142, loss_att=45.987, acc=0.699, loss=50.833, backward_time=1.043, grad_norm=127.878, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.729e-05, train_time=7.596
+[gpub002:0/64] 2023-07-12 13:51:45,925 (trainer:732) INFO: 40epoch:train:901-1000batch: iter_time=1.253e-04, forward_time=0.144, loss_ctc=73.026, loss_att=58.214, acc=0.710, loss=62.657, backward_time=1.025, grad_norm=124.501, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.728e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 13:54:01,587 (trainer:732) INFO: 40epoch:train:1001-1100batch: iter_time=1.277e-04, forward_time=0.143, loss_ctc=79.141, loss_att=57.052, acc=0.725, loss=63.679, backward_time=1.025, grad_norm=151.133, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 13:56:17,421 (trainer:732) INFO: 40epoch:train:1101-1200batch: iter_time=1.301e-04, forward_time=0.143, loss_ctc=71.202, loss_att=55.122, acc=0.705, loss=59.946, backward_time=1.026, grad_norm=97.471, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.727e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 13:58:33,114 (trainer:732) INFO: 40epoch:train:1201-1300batch: iter_time=1.240e-04, forward_time=0.143, loss_ctc=71.459, loss_att=52.133, acc=0.724, loss=57.931, backward_time=1.025, grad_norm=140.565, clip=100.000, loss_scale=5.192e+32, optim_step_time=0.179, optim0_lr0=5.726e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 14:00:48,389 (trainer:732) INFO: 40epoch:train:1301-1400batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=65.305, loss_att=45.866, acc=0.695, loss=51.698, backward_time=1.021, grad_norm=108.278, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.725e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 14:03:05,877 (trainer:732) INFO: 40epoch:train:1401-1500batch: iter_time=1.239e-04, forward_time=0.142, loss_ctc=68.643, loss_att=50.860, acc=0.714, loss=56.195, backward_time=1.025, grad_norm=114.462, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.750
+[gpub002:0/64] 2023-07-12 14:05:24,000 (trainer:732) INFO: 40epoch:train:1501-1600batch: iter_time=1.178e-04, forward_time=0.143, loss_ctc=67.303, loss_att=51.300, acc=0.712, loss=56.101, backward_time=1.026, grad_norm=99.620, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.724e-05, train_time=2.762
+[gpub002:0/64] 2023-07-12 14:06:37,008 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 14:06:56,326 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-12 14:07:14,096 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd881b5ea0>)
+[gpub002:0/64] 2023-07-12 14:07:17,513 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-12 14:07:17,519 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 14:12:18,848 (trainer:732) INFO: 40epoch:train:1601-1700batch: iter_time=1.255, forward_time=0.143, loss_ctc=66.208, loss_att=50.001, acc=0.701, loss=54.863, backward_time=1.030, grad_norm=128.956, clip=100.000, loss_scale=4.967e+32, optim_step_time=0.179, optim0_lr0=5.723e-05, train_time=8.297
+[gpub002:0/64] 2023-07-12 14:14:35,280 (trainer:732) INFO: 40epoch:train:1701-1800batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=67.632, loss_att=51.671, acc=0.709, loss=56.460, backward_time=1.028, grad_norm=131.230, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.722e-05, train_time=2.728
+[gpub002:0/64] 2023-07-12 14:16:51,826 (trainer:732) INFO: 40epoch:train:1801-1900batch: iter_time=1.260e-04, forward_time=0.145, loss_ctc=77.856, loss_att=57.061, acc=0.718, loss=63.299, backward_time=1.031, grad_norm=127.495, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.722e-05, train_time=2.731
+[gpub002:0/64] 2023-07-12 14:19:07,735 (trainer:732) INFO: 40epoch:train:1901-2000batch: iter_time=1.083e-04, forward_time=0.143, loss_ctc=74.187, loss_att=54.012, acc=0.723, loss=60.065, backward_time=1.029, grad_norm=134.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.721e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 14:21:23,408 (trainer:732) INFO: 40epoch:train:2001-2100batch: iter_time=1.295e-04, forward_time=0.143, loss_ctc=71.655, loss_att=56.017, acc=0.709, loss=60.708, backward_time=1.025, grad_norm=113.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.720e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 14:23:38,786 (trainer:732) INFO: 40epoch:train:2101-2200batch: iter_time=1.211e-04, forward_time=0.143, loss_ctc=68.732, loss_att=48.850, acc=0.703, loss=54.815, backward_time=1.022, grad_norm=114.122, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 14:25:54,333 (trainer:732) INFO: 40epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=63.319, loss_att=45.815, acc=0.718, loss=51.066, backward_time=1.024, grad_norm=112.742, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.719e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 14:28:11,996 (trainer:732) INFO: 40epoch:train:2301-2400batch: iter_time=1.254e-04, forward_time=0.143, loss_ctc=66.195, loss_att=50.742, acc=0.715, loss=55.378, backward_time=1.023, grad_norm=102.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.718e-05, train_time=2.753
+[gpub002:0/64] 2023-07-12 14:30:30,321 (trainer:732) INFO: 40epoch:train:2401-2500batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=71.093, loss_att=53.758, acc=0.706, loss=58.959, backward_time=1.027, grad_norm=115.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.717e-05, train_time=2.766
+[gpub002:0/64] 2023-07-12 14:30:32,911 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-12 14:30:50,993 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbf8e2ef460>)
+[gpub002:0/64] 2023-07-12 14:30:54,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-12 14:30:54,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 14:37:48,647 (trainer:732) INFO: 40epoch:train:2501-2600batch: iter_time=1.257, forward_time=0.143, loss_ctc=61.405, loss_att=46.625, acc=0.694, loss=51.059, backward_time=1.033, grad_norm=109.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=8.766
+[gpub002:0/64] 2023-07-12 14:40:05,295 (trainer:732) INFO: 40epoch:train:2601-2700batch: iter_time=1.359e-04, forward_time=0.144, loss_ctc=69.964, loss_att=53.049, acc=0.717, loss=58.124, backward_time=1.027, grad_norm=135.636, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.716e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 14:42:21,035 (trainer:732) INFO: 40epoch:train:2701-2800batch: iter_time=1.229e-04, forward_time=0.144, loss_ctc=77.512, loss_att=56.118, acc=0.709, loss=62.537, backward_time=1.025, grad_norm=137.980, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.715e-05, train_time=2.715
+[gpub002:0/64] 2023-07-12 14:44:36,479 (trainer:732) INFO: 40epoch:train:2801-2900batch: iter_time=1.048e-04, forward_time=0.142, loss_ctc=71.235, loss_att=56.169, acc=0.706, loss=60.689, backward_time=1.023, grad_norm=120.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.714e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 14:46:51,759 (trainer:732) INFO: 40epoch:train:2901-3000batch: iter_time=1.043e-04, forward_time=0.143, loss_ctc=68.448, loss_att=51.921, acc=0.704, loss=56.879, backward_time=1.022, grad_norm=104.401, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 14:49:07,671 (trainer:732) INFO: 40epoch:train:3001-3100batch: iter_time=1.139e-04, forward_time=0.142, loss_ctc=66.825, loss_att=44.975, acc=0.706, loss=51.530, backward_time=1.021, grad_norm=103.993, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.713e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 14:51:28,934 (trainer:732) INFO: 40epoch:train:3101-3200batch: iter_time=1.132e-04, forward_time=0.142, loss_ctc=65.918, loss_att=48.901, acc=0.716, loss=54.006, backward_time=1.027, grad_norm=102.075, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.712e-05, train_time=2.825
+[gpub002:0/64] 2023-07-12 14:53:49,088 (trainer:732) INFO: 40epoch:train:3201-3300batch: iter_time=1.065e-04, forward_time=0.142, loss_ctc=69.493, loss_att=54.998, acc=0.698, loss=59.346, backward_time=1.045, grad_norm=117.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.711e-05, train_time=2.803
+[gpub002:0/64] 2023-07-12 14:54:39,670 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-12 14:54:57,543 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2fc2f610>)
+[gpub002:0/64] 2023-07-12 14:55:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-12 14:55:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 15:00:02,934 (trainer:732) INFO: 40epoch:train:3301-3400batch: iter_time=1.259, forward_time=0.143, loss_ctc=63.620, loss_att=45.338, acc=0.714, loss=50.822, backward_time=1.049, grad_norm=109.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=7.477
+[gpub002:0/64] 2023-07-12 15:02:20,429 (trainer:732) INFO: 40epoch:train:3401-3500batch: iter_time=1.191e-04, forward_time=0.143, loss_ctc=66.528, loss_att=49.949, acc=0.714, loss=54.922, backward_time=1.026, grad_norm=125.062, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.710e-05, train_time=2.750
+[gpub002:0/64] 2023-07-12 15:04:36,303 (trainer:732) INFO: 40epoch:train:3501-3600batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=76.092, loss_att=55.412, acc=0.724, loss=61.616, backward_time=1.026, grad_norm=122.114, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.709e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 15:06:52,169 (trainer:732) INFO: 40epoch:train:3601-3700batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=72.680, loss_att=53.035, acc=0.724, loss=58.929, backward_time=1.025, grad_norm=97.254, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.708e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 15:09:07,796 (trainer:732) INFO: 40epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.143, loss_ctc=69.121, loss_att=53.328, acc=0.720, loss=58.066, backward_time=1.024, grad_norm=139.481, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 15:11:23,110 (trainer:732) INFO: 40epoch:train:3801-3900batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.056, loss_att=47.903, acc=0.705, loss=54.249, backward_time=1.021, grad_norm=121.204, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.707e-05, train_time=2.706
+[gpub002:0/64] 2023-07-12 15:13:40,798 (trainer:732) INFO: 40epoch:train:3901-4000batch: iter_time=1.128e-04, forward_time=0.143, loss_ctc=65.887, loss_att=48.895, acc=0.717, loss=53.992, backward_time=1.027, grad_norm=104.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.706e-05, train_time=2.754
+[gpub002:0/64] 2023-07-12 15:16:02,209 (trainer:732) INFO: 40epoch:train:4001-4100batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=66.041, loss_att=51.439, acc=0.707, loss=55.819, backward_time=1.028, grad_norm=112.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.705e-05, train_time=2.828
+[gpub002:0/64] 2023-07-12 15:17:39,601 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-12 15:17:57,438 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c567250>)
+[gpub002:0/64] 2023-07-12 15:18:00,971 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-12 15:18:00,978 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 15:21:47,365 (trainer:732) INFO: 40epoch:train:4101-4200batch: iter_time=1.250, forward_time=0.144, loss_ctc=68.950, loss_att=50.746, acc=0.718, loss=56.207, backward_time=1.041, grad_norm=106.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=6.903
+[gpub002:0/64] 2023-07-12 15:24:03,354 (trainer:732) INFO: 40epoch:train:4201-4300batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=62.707, loss_att=48.487, acc=0.702, loss=52.753, backward_time=1.026, grad_norm=100.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.704e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 15:26:19,013 (trainer:732) INFO: 40epoch:train:4301-4400batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=69.760, loss_att=52.370, acc=0.725, loss=57.587, backward_time=1.023, grad_norm=106.670, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.703e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 15:28:34,645 (trainer:732) INFO: 40epoch:train:4401-4500batch: iter_time=1.100e-04, forward_time=0.143, loss_ctc=78.815, loss_att=57.161, acc=0.718, loss=63.657, backward_time=1.024, grad_norm=126.050, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.702e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 15:30:50,128 (trainer:732) INFO: 40epoch:train:4501-4600batch: iter_time=1.185e-04, forward_time=0.143, loss_ctc=67.843, loss_att=54.104, acc=0.716, loss=58.226, backward_time=1.023, grad_norm=113.696, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 15:33:05,671 (trainer:732) INFO: 40epoch:train:4601-4700batch: iter_time=1.154e-04, forward_time=0.143, loss_ctc=70.374, loss_att=50.230, acc=0.723, loss=56.273, backward_time=1.023, grad_norm=118.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.701e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 15:35:20,996 (trainer:732) INFO: 40epoch:train:4701-4800batch: iter_time=1.161e-04, forward_time=0.143, loss_ctc=64.594, loss_att=44.965, acc=0.707, loss=50.854, backward_time=1.022, grad_norm=106.973, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.700e-05, train_time=2.706
+[gpub002:0/64] 2023-07-12 15:37:36,540 (trainer:732) INFO: 40epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.143, loss_ctc=67.322, loss_att=50.435, acc=0.717, loss=55.501, backward_time=1.024, grad_norm=103.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.699e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 15:39:52,011 (trainer:732) INFO: 40epoch:train:4901-5000batch: iter_time=1.164e-04, forward_time=0.143, loss_ctc=70.025, loss_att=54.199, acc=0.712, loss=58.947, backward_time=1.023, grad_norm=113.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.698e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 15:39:54,676 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-12 15:40:13,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2fc44760>)
+[gpub002:0/64] 2023-07-12 15:40:16,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-12 15:40:16,437 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 15:45:20,743 (trainer:732) INFO: 40epoch:train:5001-5100batch: iter_time=1.263, forward_time=0.180, loss_ctc=60.084, loss_att=45.552, acc=0.702, loss=49.912, backward_time=1.033, grad_norm=102.864, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.698e-05, train_time=6.574
+[gpub002:0/64] 2023-07-12 15:47:37,331 (trainer:732) INFO: 40epoch:train:5101-5200batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=70.511, loss_att=52.939, acc=0.720, loss=58.210, backward_time=1.024, grad_norm=103.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.697e-05, train_time=2.732
+[gpub002:0/64] 2023-07-12 15:49:58,905 (trainer:732) INFO: 40epoch:train:5201-5300batch: iter_time=1.112e-04, forward_time=0.142, loss_ctc=77.354, loss_att=56.014, acc=0.710, loss=62.416, backward_time=1.023, grad_norm=133.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.696e-05, train_time=2.831
+[gpub002:0/64] 2023-07-12 15:52:14,432 (trainer:732) INFO: 40epoch:train:5301-5400batch: iter_time=1.243e-04, forward_time=0.143, loss_ctc=68.506, loss_att=54.652, acc=0.711, loss=58.808, backward_time=1.025, grad_norm=114.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.695e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 15:54:29,792 (trainer:732) INFO: 40epoch:train:5401-5500batch: iter_time=1.084e-04, forward_time=0.143, loss_ctc=68.380, loss_att=50.824, acc=0.707, loss=56.091, backward_time=1.023, grad_norm=186.761, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.695e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 15:56:44,731 (trainer:732) INFO: 40epoch:train:5501-5600batch: iter_time=1.176e-04, forward_time=0.142, loss_ctc=65.993, loss_att=45.437, acc=0.706, loss=51.604, backward_time=1.019, grad_norm=104.703, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.694e-05, train_time=2.699
+[gpub002:0/64] 2023-07-12 15:58:59,893 (trainer:732) INFO: 40epoch:train:5601-5700batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=65.646, loss_att=48.820, acc=0.718, loss=53.868, backward_time=1.022, grad_norm=121.882, clip=100.000, loss_scale=4.738e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.703
+[gpub002:0/64] 2023-07-12 16:01:15,393 (trainer:732) INFO: 40epoch:train:5701-5800batch: iter_time=1.205e-04, forward_time=0.143, loss_ctc=69.173, loss_att=55.052, acc=0.701, loss=59.288, backward_time=1.025, grad_norm=113.446, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.693e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 16:02:13,718 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-12 16:02:31,777 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 16:02:35,214 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4d24f430>)
+[gpub002:0/64] 2023-07-12 16:02:35,215 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-12 16:02:35,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 16:06:51,419 (trainer:732) INFO: 40epoch:train:5801-5900batch: iter_time=1.911, forward_time=0.144, loss_ctc=61.992, loss_att=46.321, acc=0.712, loss=51.022, backward_time=1.033, grad_norm=119.027, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.692e-05, train_time=6.720
+[gpub002:0/64] 2023-07-12 16:09:07,218 (trainer:732) INFO: 40epoch:train:5901-6000batch: iter_time=1.335e-04, forward_time=0.143, loss_ctc=66.798, loss_att=49.972, acc=0.708, loss=55.019, backward_time=1.023, grad_norm=108.107, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.691e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 16:11:22,903 (trainer:732) INFO: 40epoch:train:6001-6100batch: iter_time=1.237e-04, forward_time=0.143, loss_ctc=76.895, loss_att=56.337, acc=0.716, loss=62.504, backward_time=1.024, grad_norm=192.818, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 16:13:38,302 (trainer:732) INFO: 40epoch:train:6101-6200batch: iter_time=1.439e-04, forward_time=0.144, loss_ctc=73.992, loss_att=52.793, acc=0.718, loss=59.152, backward_time=1.022, grad_norm=97.949, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.690e-05, train_time=2.708
+[gpub002:0/64] 2023-07-12 16:15:53,887 (trainer:732) INFO: 40epoch:train:6201-6300batch: iter_time=1.375e-04, forward_time=0.144, loss_ctc=69.085, loss_att=53.781, acc=0.709, loss=58.372, backward_time=1.024, grad_norm=107.327, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.689e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:18:09,295 (trainer:732) INFO: 40epoch:train:6301-6400batch: iter_time=1.276e-04, forward_time=0.145, loss_ctc=67.794, loss_att=47.222, acc=0.705, loss=53.394, backward_time=1.023, grad_norm=105.215, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.179, optim0_lr0=5.688e-05, train_time=2.708
+[gpub002:0/64] 2023-07-12 16:20:24,498 (trainer:732) INFO: 40epoch:train:6401-6500batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=65.690, loss_att=47.722, acc=0.718, loss=53.112, backward_time=1.023, grad_norm=125.378, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.704
+[gpub002:0/64] 2023-07-12 16:22:12,380 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 16:22:39,598 (trainer:732) INFO: 40epoch:train:6501-6600batch: iter_time=1.096e-04, forward_time=0.144, loss_ctc=64.236, loss_att=49.951, acc=0.705, loss=54.237, backward_time=1.023, grad_norm=93.202, clip=100.000, loss_scale=5.828e+32, optim_step_time=0.180, optim0_lr0=5.687e-05, train_time=2.702
+[gpub002:0/64] 2023-07-12 16:24:14,970 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-12 16:24:32,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd433f8d00>)
+[gpub002:0/64] 2023-07-12 16:24:36,515 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-12 16:24:36,521 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 16:28:25,470 (trainer:732) INFO: 40epoch:train:6601-6700batch: iter_time=1.255, forward_time=0.145, loss_ctc=68.337, loss_att=49.507, acc=0.724, loss=55.156, backward_time=1.038, grad_norm=116.207, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.686e-05, train_time=6.917
+[gpub002:0/64] 2023-07-12 16:30:42,084 (trainer:732) INFO: 40epoch:train:6701-6800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=62.314, loss_att=47.201, acc=0.706, loss=51.735, backward_time=1.025, grad_norm=98.754, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.685e-05, train_time=2.732
+[gpub002:0/64] 2023-07-12 16:32:57,908 (trainer:732) INFO: 40epoch:train:6801-6900batch: iter_time=1.346e-04, forward_time=0.145, loss_ctc=68.481, loss_att=52.287, acc=0.727, loss=57.145, backward_time=1.026, grad_norm=109.277, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 16:35:13,486 (trainer:732) INFO: 40epoch:train:6901-7000batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=79.843, loss_att=57.285, acc=0.717, loss=64.053, backward_time=1.026, grad_norm=130.573, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.684e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:37:29,332 (trainer:732) INFO: 40epoch:train:7001-7100batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=67.417, loss_att=53.169, acc=0.721, loss=57.444, backward_time=1.027, grad_norm=147.307, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.683e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 16:39:44,891 (trainer:732) INFO: 40epoch:train:7101-7200batch: iter_time=1.272e-04, forward_time=0.145, loss_ctc=70.180, loss_att=50.528, acc=0.727, loss=56.424, backward_time=1.024, grad_norm=107.531, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.682e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:42:05,273 (trainer:732) INFO: 40epoch:train:7201-7300batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=63.893, loss_att=44.742, acc=0.708, loss=50.488, backward_time=1.023, grad_norm=103.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.807
+[gpub002:0/64] 2023-07-12 16:44:20,746 (trainer:732) INFO: 40epoch:train:7301-7400batch: iter_time=1.136e-04, forward_time=0.143, loss_ctc=67.728, loss_att=50.827, acc=0.719, loss=55.897, backward_time=1.024, grad_norm=118.243, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.681e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 16:46:36,313 (trainer:732) INFO: 40epoch:train:7401-7500batch: iter_time=1.123e-04, forward_time=0.143, loss_ctc=69.549, loss_att=54.017, acc=0.715, loss=58.676, backward_time=1.024, grad_norm=105.138, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.680e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 16:46:39,103 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-12 16:46:57,326 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb34318ba60>)
+[gpub002:0/64] 2023-07-12 16:47:00,716 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-12 16:47:00,722 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 16:52:43,273 (trainer:732) INFO: 40epoch:train:7501-7600batch: iter_time=1.295, forward_time=0.144, loss_ctc=58.813, loss_att=42.145, acc=0.710, loss=47.145, backward_time=1.037, grad_norm=124.111, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=7.339
+[gpub002:0/64] 2023-07-12 16:54:59,882 (trainer:732) INFO: 40epoch:train:7601-7700batch: iter_time=1.243e-04, forward_time=0.144, loss_ctc=68.732, loss_att=53.937, acc=0.713, loss=58.375, backward_time=1.029, grad_norm=130.599, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.679e-05, train_time=2.732
+[gpub002:0/64] 2023-07-12 16:57:15,353 (trainer:732) INFO: 40epoch:train:7701-7800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=77.851, loss_att=55.826, acc=0.727, loss=62.433, backward_time=1.025, grad_norm=121.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.678e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 16:59:30,902 (trainer:732) INFO: 40epoch:train:7801-7900batch: iter_time=1.444e-04, forward_time=0.144, loss_ctc=70.346, loss_att=53.701, acc=0.708, loss=58.695, backward_time=1.026, grad_norm=112.362, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.677e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 17:01:46,962 (trainer:732) INFO: 40epoch:train:7901-8000batch: iter_time=1.644e-04, forward_time=0.145, loss_ctc=69.909, loss_att=51.923, acc=0.720, loss=57.319, backward_time=1.024, grad_norm=115.655, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 17:04:02,181 (trainer:732) INFO: 40epoch:train:8001-8100batch: iter_time=1.097e-04, forward_time=0.143, loss_ctc=64.797, loss_att=45.585, acc=0.695, loss=51.348, backward_time=1.021, grad_norm=96.860, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.676e-05, train_time=2.704
+[gpub002:0/64] 2023-07-12 17:06:18,006 (trainer:732) INFO: 40epoch:train:8101-8200batch: iter_time=1.347e-04, forward_time=0.144, loss_ctc=65.946, loss_att=48.537, acc=0.721, loss=53.759, backward_time=1.025, grad_norm=103.143, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.675e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 17:08:43,714 (trainer:732) INFO: 40epoch:train:8201-8300batch: iter_time=1.585e-04, forward_time=0.144, loss_ctc=66.556, loss_att=50.587, acc=0.709, loss=55.378, backward_time=1.033, grad_norm=104.722, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.674e-05, train_time=2.914
+[gpub002:0/64] 2023-07-12 17:09:31,672 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-12 17:09:49,815 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c3dfac0>)
+[gpub002:0/64] 2023-07-12 17:09:53,285 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-12 17:09:53,291 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 17:16:05,942 (trainer:732) INFO: 40epoch:train:8301-8400batch: iter_time=1.764, forward_time=0.145, loss_ctc=61.574, loss_att=47.462, acc=0.713, loss=51.695, backward_time=1.040, grad_norm=100.876, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=8.844
+[gpub002:0/64] 2023-07-12 17:18:22,844 (trainer:732) INFO: 40epoch:train:8401-8500batch: iter_time=1.222e-04, forward_time=0.144, loss_ctc=66.028, loss_att=49.519, acc=0.712, loss=54.472, backward_time=1.025, grad_norm=100.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.673e-05, train_time=2.738
+[gpub002:0/64] 2023-07-12 17:20:39,127 (trainer:732) INFO: 40epoch:train:8501-8600batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=75.776, loss_att=55.115, acc=0.722, loss=61.313, backward_time=1.028, grad_norm=133.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.672e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 17:22:54,520 (trainer:732) INFO: 40epoch:train:8601-8700batch: iter_time=1.148e-04, forward_time=0.143, loss_ctc=71.264, loss_att=51.115, acc=0.722, loss=57.159, backward_time=1.024, grad_norm=108.773, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.708
+[gpub002:0/64] 2023-07-12 17:25:10,311 (trainer:732) INFO: 40epoch:train:8701-8800batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=70.093, loss_att=53.908, acc=0.711, loss=58.763, backward_time=1.027, grad_norm=115.711, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.671e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 17:27:25,641 (trainer:732) INFO: 40epoch:train:8801-8900batch: iter_time=1.147e-04, forward_time=0.143, loss_ctc=65.537, loss_att=45.778, acc=0.707, loss=51.706, backward_time=1.025, grad_norm=109.775, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.670e-05, train_time=2.706
+[gpub002:0/64] 2023-07-12 17:29:40,885 (trainer:732) INFO: 40epoch:train:8901-9000batch: iter_time=1.186e-04, forward_time=0.143, loss_ctc=64.933, loss_att=47.129, acc=0.721, loss=52.470, backward_time=1.024, grad_norm=108.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.669e-05, train_time=2.705
+[gpub002:0/64] 2023-07-12 17:31:56,246 (trainer:732) INFO: 40epoch:train:9001-9100batch: iter_time=1.121e-04, forward_time=0.143, loss_ctc=64.752, loss_att=50.086, acc=0.708, loss=54.486, backward_time=1.024, grad_norm=114.341, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.668e-05, train_time=2.707
+[gpub002:0/64] 2023-07-12 17:33:29,921 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-12 17:33:48,221 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 17:33:51,742 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb222a334f0>)
+[gpub002:0/64] 2023-07-12 17:33:51,743 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-12 17:33:51,749 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 17:38:46,300 (trainer:732) INFO: 40epoch:train:9101-9200batch: iter_time=1.947, forward_time=0.180, loss_ctc=68.492, loss_att=49.647, acc=0.722, loss=55.300, backward_time=1.038, grad_norm=117.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.668e-05, train_time=8.201
+[gpub002:0/64] 2023-07-12 17:41:02,935 (trainer:732) INFO: 40epoch:train:9201-9300batch: iter_time=1.099e-04, forward_time=0.145, loss_ctc=61.780, loss_att=48.408, acc=0.705, loss=52.420, backward_time=1.025, grad_norm=130.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.180, optim0_lr0=5.667e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 17:43:20,427 (trainer:732) INFO: 40epoch:train:9301-9400batch: iter_time=1.202e-04, forward_time=0.144, loss_ctc=68.219, loss_att=52.508, acc=0.732, loss=57.222, backward_time=1.027, grad_norm=114.930, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.666e-05, train_time=2.750
+[gpub002:0/64] 2023-07-12 17:45:36,344 (trainer:732) INFO: 40epoch:train:9401-9500batch: iter_time=9.479e-05, forward_time=0.142, loss_ctc=78.737, loss_att=56.410, acc=0.723, loss=63.108, backward_time=1.025, grad_norm=120.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 17:47:51,855 (trainer:732) INFO: 40epoch:train:9501-9600batch: iter_time=9.460e-05, forward_time=0.143, loss_ctc=66.156, loss_att=54.034, acc=0.715, loss=57.671, backward_time=1.023, grad_norm=119.804, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.665e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 17:50:07,301 (trainer:732) INFO: 40epoch:train:9601-9700batch: iter_time=9.742e-05, forward_time=0.143, loss_ctc=69.405, loss_att=50.896, acc=0.723, loss=56.449, backward_time=1.024, grad_norm=105.723, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.664e-05, train_time=2.709
+[gpub002:0/64] 2023-07-12 17:52:22,352 (trainer:732) INFO: 40epoch:train:9701-9800batch: iter_time=1.037e-04, forward_time=0.142, loss_ctc=62.756, loss_att=43.537, acc=0.712, loss=49.303, backward_time=1.022, grad_norm=114.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.701
+[gpub002:0/64] 2023-07-12 17:54:44,234 (trainer:732) INFO: 40epoch:train:9801-9900batch: iter_time=9.713e-05, forward_time=0.143, loss_ctc=67.843, loss_att=50.651, acc=0.721, loss=55.809, backward_time=1.030, grad_norm=112.352, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.663e-05, train_time=2.837
+[gpub002:0/64] 2023-07-12 17:57:01,502 (trainer:732) INFO: 40epoch:train:9901-10000batch: iter_time=9.865e-05, forward_time=0.141, loss_ctc=69.656, loss_att=53.667, acc=0.714, loss=58.464, backward_time=1.028, grad_norm=130.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.179, optim0_lr0=5.662e-05, train_time=2.745
+[gpub002:0/64] 2023-07-12 18:11:36,573 (trainer:338) INFO: 40epoch results: [train] iter_time=0.178, forward_time=0.145, loss_ctc=68.748, loss_att=51.163, acc=0.712, loss=56.438, backward_time=1.027, grad_norm=116.730, clip=100.000, loss_scale=3.679e+32, optim_step_time=0.180, optim0_lr0=5.698e-05, train_time=3.327, time=4 hours, 37 minutes and 31.92 seconds, total_count=370000, gpu_max_cached_mem_GB=34.277, [valid] loss_ctc=44.137, cer_ctc=0.263, loss_att=39.500, acc=0.667, cer=0.428, wer=1.000, loss=40.891, time=7 minutes and 44.49 seconds, total_count=37950, gpu_max_cached_mem_GB=37.572, [att_plot] time=6 minutes and 32.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.572
+[gpub002:0/64] 2023-07-12 18:11:52,961 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-12 18:11:53,003 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till40epoch.pth
+[gpub002:0/64] 2023-07-12 18:12:44,008 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till40epoch.pth
+[gpub002:0/64] 2023-07-12 18:13:08,270 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/35epoch.pth
+[gpub002:0/64] 2023-07-12 18:13:08,326 (trainer:272) INFO: 41/50epoch started. Estimated time to finish: 2 days, 53 minutes and 30.81 seconds
+[gpub002:0/64] 2023-07-12 18:13:09,583 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-12 18:13:27,325 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6d69e620>)
+[gpub002:0/64] 2023-07-12 18:13:30,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-12 18:13:30,878 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 18:21:17,615 (trainer:732) INFO: 41epoch:train:1-100batch: iter_time=3.473, forward_time=0.171, loss_ctc=70.840, loss_att=55.420, acc=0.704, loss=60.046, backward_time=1.043, grad_norm=129.418, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.661e-05, train_time=9.772
+[gpub002:0/64] 2023-07-12 18:23:33,343 (trainer:732) INFO: 41epoch:train:101-200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.748, loss_att=50.057, acc=0.705, loss=55.364, backward_time=1.028, grad_norm=114.263, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 18:25:50,201 (trainer:732) INFO: 41epoch:train:201-300batch: iter_time=1.232e-04, forward_time=0.152, loss_ctc=66.301, loss_att=51.918, acc=0.721, loss=56.233, backward_time=1.028, grad_norm=123.052, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.660e-05, train_time=2.737
+[gpub002:0/64] 2023-07-12 18:28:19,026 (trainer:732) INFO: 41epoch:train:301-400batch: iter_time=1.049e-04, forward_time=0.145, loss_ctc=75.210, loss_att=59.758, acc=0.716, loss=64.394, backward_time=1.054, grad_norm=126.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.659e-05, train_time=2.976
+[gpub002:0/64] 2023-07-12 18:30:40,311 (trainer:732) INFO: 41epoch:train:401-500batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=60.935, loss_att=47.224, acc=0.714, loss=51.338, backward_time=1.039, grad_norm=134.990, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.658e-05, train_time=2.825
+[gpub002:0/64] 2023-07-12 18:32:59,302 (trainer:732) INFO: 41epoch:train:501-600batch: iter_time=1.050e-04, forward_time=0.147, loss_ctc=75.633, loss_att=61.020, acc=0.718, loss=65.404, backward_time=1.032, grad_norm=146.761, clip=100.000, loss_scale=3.894e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.780
+[gpub002:0/64] 2023-07-12 18:35:20,968 (trainer:732) INFO: 41epoch:train:601-700batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=70.522, loss_att=53.175, acc=0.708, loss=58.379, backward_time=1.040, grad_norm=114.115, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.657e-05, train_time=2.833
+[gpub002:0/64] 2023-07-12 18:37:47,401 (trainer:732) INFO: 41epoch:train:701-800batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=71.912, loss_att=54.658, acc=0.713, loss=59.834, backward_time=1.046, grad_norm=114.837, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.656e-05, train_time=2.928
+[gpub002:0/64] 2023-07-12 18:37:49,975 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 18:38:40,955 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-12 18:38:58,926 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4d08cb80>)
+[gpub002:0/64] 2023-07-12 18:39:02,533 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-12 18:39:02,539 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 18:44:47,143 (trainer:732) INFO: 41epoch:train:801-900batch: iter_time=1.647, forward_time=0.146, loss_ctc=72.599, loss_att=54.115, acc=0.703, loss=59.660, backward_time=1.040, grad_norm=128.541, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=8.395
+[gpub002:0/64] 2023-07-12 18:45:06,195 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 18:47:03,912 (trainer:732) INFO: 41epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.145, loss_ctc=70.940, loss_att=54.535, acc=0.709, loss=59.457, backward_time=1.029, grad_norm=120.295, clip=100.000, loss_scale=1.821e+32, optim_step_time=0.182, optim0_lr0=5.655e-05, train_time=2.735
+[gpub002:0/64] 2023-07-12 18:49:19,533 (trainer:732) INFO: 41epoch:train:1001-1100batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=61.609, loss_att=44.306, acc=0.724, loss=49.497, backward_time=1.027, grad_norm=111.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.654e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 18:51:36,281 (trainer:732) INFO: 41epoch:train:1101-1200batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=73.032, loss_att=63.628, acc=0.705, loss=66.449, backward_time=1.034, grad_norm=164.160, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.653e-05, train_time=2.735
+[gpub002:0/64] 2023-07-12 18:53:52,356 (trainer:732) INFO: 41epoch:train:1201-1300batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=68.936, loss_att=48.977, acc=0.729, loss=54.965, backward_time=1.030, grad_norm=118.209, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 18:56:08,340 (trainer:732) INFO: 41epoch:train:1301-1400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=72.457, loss_att=57.311, acc=0.711, loss=61.854, backward_time=1.030, grad_norm=123.795, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.652e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 18:58:24,327 (trainer:732) INFO: 41epoch:train:1401-1500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.363, acc=0.719, loss=58.121, backward_time=1.030, grad_norm=100.726, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.651e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 19:00:40,397 (trainer:732) INFO: 41epoch:train:1501-1600batch: iter_time=1.334e-04, forward_time=0.146, loss_ctc=71.626, loss_att=57.110, acc=0.711, loss=61.465, backward_time=1.031, grad_norm=128.152, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.650e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 19:02:11,569 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-12 19:02:29,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6ccd7dc0>)
+[gpub002:0/64] 2023-07-12 19:02:33,282 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-12 19:02:33,289 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 19:08:06,638 (trainer:732) INFO: 41epoch:train:1601-1700batch: iter_time=1.676, forward_time=0.169, loss_ctc=70.421, loss_att=49.398, acc=0.710, loss=55.705, backward_time=1.041, grad_norm=138.098, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.650e-05, train_time=8.923
+[gpub002:0/64] 2023-07-12 19:10:23,384 (trainer:732) INFO: 41epoch:train:1701-1800batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=69.158, loss_att=55.267, acc=0.709, loss=59.434, backward_time=1.031, grad_norm=104.995, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.649e-05, train_time=2.736
+[gpub002:0/64] 2023-07-12 19:12:38,917 (trainer:732) INFO: 41epoch:train:1801-1900batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=66.374, loss_att=50.597, acc=0.699, loss=55.330, backward_time=1.027, grad_norm=112.628, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.648e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 19:14:54,622 (trainer:732) INFO: 41epoch:train:1901-2000batch: iter_time=1.230e-04, forward_time=0.145, loss_ctc=68.154, loss_att=57.325, acc=0.709, loss=60.574, backward_time=1.027, grad_norm=103.678, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 19:17:10,478 (trainer:732) INFO: 41epoch:train:2001-2100batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=72.381, loss_att=54.045, acc=0.729, loss=59.546, backward_time=1.027, grad_norm=114.921, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.647e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 19:19:26,375 (trainer:732) INFO: 41epoch:train:2101-2200batch: iter_time=1.268e-04, forward_time=0.145, loss_ctc=68.777, loss_att=54.166, acc=0.702, loss=58.549, backward_time=1.028, grad_norm=113.943, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.646e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 19:21:42,063 (trainer:732) INFO: 41epoch:train:2201-2300batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=63.800, loss_att=49.285, acc=0.717, loss=53.640, backward_time=1.027, grad_norm=146.299, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.645e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 19:23:57,840 (trainer:732) INFO: 41epoch:train:2301-2400batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=74.630, loss_att=56.323, acc=0.712, loss=61.815, backward_time=1.028, grad_norm=111.859, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=2.715
+[gpub002:0/64] 2023-07-12 19:26:19,254 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-12 19:26:37,400 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3e27b460>)
+[gpub002:0/64] 2023-07-12 19:26:40,808 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-12 19:26:40,814 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 19:29:54,444 (trainer:732) INFO: 41epoch:train:2401-2500batch: iter_time=2.164, forward_time=0.165, loss_ctc=73.293, loss_att=57.191, acc=0.704, loss=62.022, backward_time=1.035, grad_norm=154.823, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.644e-05, train_time=7.132
+[gpub002:0/64] 2023-07-12 19:32:11,981 (trainer:732) INFO: 41epoch:train:2501-2600batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=70.299, loss_att=55.298, acc=0.703, loss=59.798, backward_time=1.037, grad_norm=109.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.643e-05, train_time=2.751
+[gpub002:0/64] 2023-07-12 19:34:27,583 (trainer:732) INFO: 41epoch:train:2601-2700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=65.933, loss_att=48.206, acc=0.704, loss=53.524, backward_time=1.028, grad_norm=91.627, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.712
+[gpub002:0/64] 2023-07-12 19:36:43,388 (trainer:732) INFO: 41epoch:train:2701-2800batch: iter_time=1.208e-04, forward_time=0.147, loss_ctc=64.547, loss_att=51.693, acc=0.716, loss=55.549, backward_time=1.027, grad_norm=123.357, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.642e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 19:38:59,319 (trainer:732) INFO: 41epoch:train:2801-2900batch: iter_time=1.147e-04, forward_time=0.145, loss_ctc=74.813, loss_att=58.932, acc=0.715, loss=63.696, backward_time=1.030, grad_norm=122.567, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.641e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 19:41:15,045 (trainer:732) INFO: 41epoch:train:2901-3000batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=61.440, loss_att=45.559, acc=0.725, loss=50.324, backward_time=1.028, grad_norm=124.359, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.640e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 19:43:31,121 (trainer:732) INFO: 41epoch:train:3001-3100batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=73.113, loss_att=58.408, acc=0.712, loss=62.819, backward_time=1.028, grad_norm=132.870, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 19:45:49,866 (trainer:732) INFO: 41epoch:train:3101-3200batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=70.690, loss_att=51.235, acc=0.716, loss=57.071, backward_time=1.033, grad_norm=113.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.639e-05, train_time=2.775
+[gpub002:0/64] 2023-07-12 19:48:12,230 (trainer:732) INFO: 41epoch:train:3201-3300batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=72.441, loss_att=57.254, acc=0.711, loss=61.810, backward_time=1.033, grad_norm=107.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.638e-05, train_time=2.847
+[gpub002:0/64] 2023-07-12 19:49:02,365 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-12 19:49:20,319 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6cb6f7f0>)
+[gpub002:0/64] 2023-07-12 19:49:23,731 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-12 19:49:23,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 19:54:17,214 (trainer:732) INFO: 41epoch:train:3301-3400batch: iter_time=1.844, forward_time=0.147, loss_ctc=67.576, loss_att=49.521, acc=0.713, loss=54.938, backward_time=1.043, grad_norm=133.347, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=7.299
+[gpub002:0/64] 2023-07-12 19:56:33,870 (trainer:732) INFO: 41epoch:train:3401-3500batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=70.531, loss_att=54.713, acc=0.713, loss=59.458, backward_time=1.030, grad_norm=130.288, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.637e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 19:58:49,517 (trainer:732) INFO: 41epoch:train:3501-3600batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=60.897, loss_att=42.699, acc=0.730, loss=48.158, backward_time=1.028, grad_norm=137.043, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.636e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 20:01:05,892 (trainer:732) INFO: 41epoch:train:3601-3700batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=74.026, loss_att=64.166, acc=0.709, loss=67.124, backward_time=1.032, grad_norm=127.815, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.635e-05, train_time=2.727
+[gpub002:0/64] 2023-07-12 20:03:21,618 (trainer:732) INFO: 41epoch:train:3701-3800batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=67.233, loss_att=48.554, acc=0.733, loss=54.158, backward_time=1.028, grad_norm=114.242, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 20:05:37,755 (trainer:732) INFO: 41epoch:train:3801-3900batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=70.919, loss_att=56.270, acc=0.718, loss=60.665, backward_time=1.031, grad_norm=122.264, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.634e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 20:07:53,980 (trainer:732) INFO: 41epoch:train:3901-4000batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=69.754, loss_att=53.402, acc=0.720, loss=58.308, backward_time=1.032, grad_norm=108.612, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.633e-05, train_time=2.724
+[gpub002:0/64] 2023-07-12 20:10:10,380 (trainer:732) INFO: 41epoch:train:4001-4100batch: iter_time=1.024e-04, forward_time=0.147, loss_ctc=70.275, loss_att=56.231, acc=0.716, loss=60.444, backward_time=1.032, grad_norm=117.778, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=2.728
+[gpub002:0/64] 2023-07-12 20:11:39,912 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-12 20:11:58,290 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2eb000d0>)
+[gpub002:0/64] 2023-07-12 20:12:01,692 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-12 20:12:01,698 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 20:17:26,480 (trainer:732) INFO: 41epoch:train:4101-4200batch: iter_time=1.566, forward_time=0.146, loss_ctc=72.772, loss_att=55.012, acc=0.708, loss=60.340, backward_time=1.042, grad_norm=117.600, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.632e-05, train_time=8.722
+[gpub002:0/64] 2023-07-12 20:19:42,895 (trainer:732) INFO: 41epoch:train:4201-4300batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=67.515, loss_att=51.939, acc=0.704, loss=56.612, backward_time=1.030, grad_norm=123.653, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.631e-05, train_time=2.728
+[gpub002:0/64] 2023-07-12 20:21:58,413 (trainer:732) INFO: 41epoch:train:4301-4400batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=61.290, loss_att=47.503, acc=0.716, loss=51.639, backward_time=1.025, grad_norm=112.693, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.630e-05, train_time=2.710
+[gpub002:0/64] 2023-07-12 20:24:14,368 (trainer:732) INFO: 41epoch:train:4401-4500batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=71.825, loss_att=62.339, acc=0.697, loss=65.185, backward_time=1.029, grad_norm=117.005, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 20:26:30,216 (trainer:732) INFO: 41epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.145, loss_ctc=64.044, loss_att=44.250, acc=0.731, loss=50.189, backward_time=1.028, grad_norm=95.630, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.629e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 20:28:46,011 (trainer:732) INFO: 41epoch:train:4601-4700batch: iter_time=1.354e-04, forward_time=0.144, loss_ctc=72.970, loss_att=56.752, acc=0.705, loss=61.617, backward_time=1.029, grad_norm=111.368, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.628e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 20:31:02,650 (trainer:732) INFO: 41epoch:train:4701-4800batch: iter_time=1.450e-04, forward_time=0.144, loss_ctc=71.101, loss_att=53.521, acc=0.717, loss=58.795, backward_time=1.029, grad_norm=100.929, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.627e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 20:33:18,834 (trainer:732) INFO: 41epoch:train:4801-4900batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.523, loss_att=58.663, acc=0.707, loss=62.821, backward_time=1.031, grad_norm=127.161, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.627e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 20:35:36,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-12 20:35:54,833 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2eb6b7c0>)
+[gpub002:0/64] 2023-07-12 20:35:58,239 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-12 20:35:58,246 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 20:41:04,776 (trainer:732) INFO: 41epoch:train:4901-5000batch: iter_time=1.597, forward_time=0.146, loss_ctc=69.320, loss_att=46.915, acc=0.725, loss=53.637, backward_time=1.038, grad_norm=113.892, clip=100.000, loss_scale=3.018e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=9.319
+[gpub002:0/64] 2023-07-12 20:43:22,964 (trainer:732) INFO: 41epoch:train:5001-5100batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=68.557, loss_att=53.531, acc=0.716, loss=58.039, backward_time=1.036, grad_norm=109.852, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.625e-05, train_time=2.764
+[gpub002:0/64] 2023-07-12 20:45:38,905 (trainer:732) INFO: 41epoch:train:5101-5200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=65.544, loss_att=48.308, acc=0.716, loss=53.479, backward_time=1.029, grad_norm=123.663, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 20:47:55,428 (trainer:732) INFO: 41epoch:train:5201-5300batch: iter_time=9.594e-05, forward_time=0.147, loss_ctc=69.697, loss_att=59.000, acc=0.717, loss=62.209, backward_time=1.031, grad_norm=109.695, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.624e-05, train_time=2.730
+[gpub002:0/64] 2023-07-12 20:50:11,373 (trainer:732) INFO: 41epoch:train:5301-5400batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=71.695, loss_att=51.848, acc=0.732, loss=57.802, backward_time=1.029, grad_norm=105.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.623e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 20:52:26,937 (trainer:732) INFO: 41epoch:train:5401-5500batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=69.713, loss_att=56.136, acc=0.698, loss=60.209, backward_time=1.026, grad_norm=124.763, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.711
+[gpub002:0/64] 2023-07-12 20:54:42,808 (trainer:732) INFO: 41epoch:train:5501-5600batch: iter_time=1.183e-04, forward_time=0.145, loss_ctc=65.673, loss_att=49.982, acc=0.728, loss=54.690, backward_time=1.028, grad_norm=121.869, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.622e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 20:56:58,543 (trainer:732) INFO: 41epoch:train:5601-5700batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=72.242, loss_att=52.803, acc=0.723, loss=58.635, backward_time=1.027, grad_norm=111.473, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.621e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 20:59:14,728 (trainer:732) INFO: 41epoch:train:5701-5800batch: iter_time=9.577e-05, forward_time=0.145, loss_ctc=69.925, loss_att=54.850, acc=0.714, loss=59.373, backward_time=1.031, grad_norm=113.233, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.620e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 21:00:01,150 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-12 21:00:19,189 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c42b4f0>)
+[gpub002:0/64] 2023-07-12 21:00:22,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-12 21:00:22,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 21:05:30,215 (trainer:732) INFO: 41epoch:train:5801-5900batch: iter_time=1.645, forward_time=0.193, loss_ctc=72.625, loss_att=53.896, acc=0.723, loss=59.515, backward_time=1.042, grad_norm=126.902, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.619e-05, train_time=7.509
+[gpub002:0/64] 2023-07-12 21:07:46,821 (trainer:732) INFO: 41epoch:train:5901-6000batch: iter_time=1.429e-04, forward_time=0.147, loss_ctc=66.989, loss_att=49.786, acc=0.709, loss=54.947, backward_time=1.029, grad_norm=130.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.619e-05, train_time=2.733
+[gpub002:0/64] 2023-07-12 21:10:03,539 (trainer:732) INFO: 41epoch:train:6001-6100batch: iter_time=1.134e-04, forward_time=0.149, loss_ctc=64.189, loss_att=51.518, acc=0.726, loss=55.319, backward_time=1.031, grad_norm=103.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.618e-05, train_time=2.734
+[gpub002:0/64] 2023-07-12 21:12:19,396 (trainer:732) INFO: 41epoch:train:6101-6200batch: iter_time=1.098e-04, forward_time=0.146, loss_ctc=72.559, loss_att=55.954, acc=0.724, loss=60.936, backward_time=1.029, grad_norm=122.019, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 21:14:35,307 (trainer:732) INFO: 41epoch:train:6201-6300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=64.191, loss_att=50.498, acc=0.713, loss=54.606, backward_time=1.028, grad_norm=114.048, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.617e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 21:15:07,857 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 21:16:51,122 (trainer:732) INFO: 41epoch:train:6301-6400batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=70.103, loss_att=54.476, acc=0.732, loss=59.164, backward_time=1.030, grad_norm=103.656, clip=100.000, loss_scale=1.987e+32, optim_step_time=0.183, optim0_lr0=5.616e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 21:17:42,514 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-12 21:19:06,809 (trainer:732) INFO: 41epoch:train:6401-6500batch: iter_time=1.398e-04, forward_time=0.146, loss_ctc=70.713, loss_att=52.907, acc=0.719, loss=58.249, backward_time=1.028, grad_norm=129.552, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.714
+[gpub002:0/64] 2023-07-12 21:21:23,047 (trainer:732) INFO: 41epoch:train:6501-6600batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=71.542, loss_att=55.074, acc=0.720, loss=60.015, backward_time=1.031, grad_norm=119.497, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.615e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 21:23:04,288 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-12 21:23:22,599 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbb5e093520>)
+[gpub002:0/64] 2023-07-12 21:23:26,047 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-12 21:23:26,053 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 21:28:01,974 (trainer:732) INFO: 41epoch:train:6601-6700batch: iter_time=2.543, forward_time=0.146, loss_ctc=71.485, loss_att=51.860, acc=0.717, loss=57.748, backward_time=1.039, grad_norm=121.242, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.614e-05, train_time=7.978
+[gpub002:0/64] 2023-07-12 21:30:19,098 (trainer:732) INFO: 41epoch:train:6701-6800batch: iter_time=1.407e-04, forward_time=0.145, loss_ctc=67.783, loss_att=52.647, acc=0.706, loss=57.188, backward_time=1.030, grad_norm=102.934, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.613e-05, train_time=2.742
+[gpub002:0/64] 2023-07-12 21:32:35,143 (trainer:732) INFO: 41epoch:train:6801-6900batch: iter_time=1.370e-04, forward_time=0.149, loss_ctc=60.432, loss_att=44.809, acc=0.723, loss=49.496, backward_time=1.030, grad_norm=143.111, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 21:34:51,046 (trainer:732) INFO: 41epoch:train:6901-7000batch: iter_time=1.716e-04, forward_time=0.147, loss_ctc=73.656, loss_att=65.329, acc=0.698, loss=67.827, backward_time=1.029, grad_norm=127.853, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.612e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 21:37:06,844 (trainer:732) INFO: 41epoch:train:7001-7100batch: iter_time=1.519e-04, forward_time=0.147, loss_ctc=63.569, loss_att=45.390, acc=0.737, loss=50.844, backward_time=1.028, grad_norm=115.348, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.611e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 21:39:22,832 (trainer:732) INFO: 41epoch:train:7101-7200batch: iter_time=1.483e-04, forward_time=0.147, loss_ctc=73.348, loss_att=57.141, acc=0.707, loss=62.003, backward_time=1.030, grad_norm=125.156, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.610e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 21:41:38,634 (trainer:732) INFO: 41epoch:train:7201-7300batch: iter_time=1.409e-04, forward_time=0.145, loss_ctc=68.433, loss_att=52.831, acc=0.715, loss=57.512, backward_time=1.026, grad_norm=117.453, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.610e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 21:43:54,806 (trainer:732) INFO: 41epoch:train:7301-7400batch: iter_time=1.454e-04, forward_time=0.148, loss_ctc=69.535, loss_att=57.942, acc=0.709, loss=61.420, backward_time=1.031, grad_norm=142.520, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.609e-05, train_time=2.723
+[gpub002:0/64] 2023-07-12 21:46:12,782 (trainer:732) INFO: 41epoch:train:7401-7500batch: iter_time=1.376e-04, forward_time=0.147, loss_ctc=67.374, loss_att=45.684, acc=0.729, loss=52.191, backward_time=1.031, grad_norm=135.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.608e-05, train_time=2.759
+[gpub002:0/64] 2023-07-12 21:46:15,854 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-12 21:46:34,163 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c8579a0>)
+[gpub002:0/64] 2023-07-12 21:46:37,582 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-12 21:46:37,588 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 21:54:09,248 (trainer:732) INFO: 41epoch:train:7501-7600batch: iter_time=1.603, forward_time=0.145, loss_ctc=71.396, loss_att=58.603, acc=0.700, loss=62.441, backward_time=1.042, grad_norm=141.498, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=9.529
+[gpub002:0/64] 2023-07-12 21:56:25,789 (trainer:732) INFO: 41epoch:train:7601-7700batch: iter_time=1.471e-04, forward_time=0.144, loss_ctc=66.114, loss_att=48.625, acc=0.707, loss=53.872, backward_time=1.029, grad_norm=119.762, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.607e-05, train_time=2.731
+[gpub002:0/64] 2023-07-12 21:58:41,647 (trainer:732) INFO: 41epoch:train:7701-7800batch: iter_time=1.349e-04, forward_time=0.145, loss_ctc=63.466, loss_att=50.427, acc=0.716, loss=54.339, backward_time=1.028, grad_norm=113.922, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.606e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 22:00:59,471 (trainer:732) INFO: 41epoch:train:7801-7900batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=72.326, loss_att=56.756, acc=0.720, loss=61.427, backward_time=1.031, grad_norm=109.457, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.756
+[gpub002:0/64] 2023-07-12 22:03:19,497 (trainer:732) INFO: 41epoch:train:7901-8000batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=60.819, loss_att=44.765, acc=0.722, loss=49.581, backward_time=1.033, grad_norm=106.824, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.605e-05, train_time=2.800
+[gpub002:0/64] 2023-07-12 22:05:35,694 (trainer:732) INFO: 41epoch:train:8001-8100batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=75.424, loss_att=59.810, acc=0.714, loss=64.495, backward_time=1.030, grad_norm=123.469, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.604e-05, train_time=2.724
+[gpub002:0/64] 2023-07-12 22:07:51,699 (trainer:732) INFO: 41epoch:train:8101-8200batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=69.288, loss_att=51.179, acc=0.716, loss=56.612, backward_time=1.029, grad_norm=117.088, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.720
+[gpub002:0/64] 2023-07-12 22:10:07,627 (trainer:732) INFO: 41epoch:train:8201-8300batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=72.391, loss_att=55.439, acc=0.714, loss=60.525, backward_time=1.029, grad_norm=127.931, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.603e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 22:10:55,464 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-12 22:11:13,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac101ae380>)
+[gpub002:0/64] 2023-07-12 22:11:17,494 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-12 22:11:17,500 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 22:18:38,342 (trainer:732) INFO: 41epoch:train:8301-8400batch: iter_time=1.615, forward_time=0.147, loss_ctc=67.249, loss_att=51.366, acc=0.704, loss=56.131, backward_time=1.046, grad_norm=129.432, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.602e-05, train_time=10.214
+[gpub002:0/64] 2023-07-12 22:20:55,423 (trainer:732) INFO: 41epoch:train:8401-8500batch: iter_time=9.569e-05, forward_time=0.145, loss_ctc=68.689, loss_att=52.556, acc=0.712, loss=57.396, backward_time=1.031, grad_norm=129.356, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.601e-05, train_time=2.741
+[gpub002:0/64] 2023-07-12 22:23:11,495 (trainer:732) INFO: 41epoch:train:8501-8600batch: iter_time=9.187e-05, forward_time=0.146, loss_ctc=62.059, loss_att=44.195, acc=0.721, loss=49.554, backward_time=1.028, grad_norm=111.154, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.721
+[gpub002:0/64] 2023-07-12 22:25:27,686 (trainer:732) INFO: 41epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=71.896, loss_att=62.165, acc=0.700, loss=65.084, backward_time=1.030, grad_norm=133.087, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.600e-05, train_time=2.724
+[gpub002:0/64] 2023-07-12 22:27:43,333 (trainer:732) INFO: 41epoch:train:8701-8800batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=68.306, loss_att=48.020, acc=0.733, loss=54.106, backward_time=1.027, grad_norm=109.698, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.599e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 22:29:58,990 (trainer:732) INFO: 41epoch:train:8801-8900batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=70.034, loss_att=54.974, acc=0.709, loss=59.492, backward_time=1.026, grad_norm=125.573, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.713
+[gpub002:0/64] 2023-07-12 22:32:14,802 (trainer:732) INFO: 41epoch:train:8901-9000batch: iter_time=9.824e-05, forward_time=0.146, loss_ctc=70.258, loss_att=54.489, acc=0.715, loss=59.220, backward_time=1.028, grad_norm=114.409, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.598e-05, train_time=2.716
+[gpub002:0/64] 2023-07-12 22:34:31,075 (trainer:732) INFO: 41epoch:train:9001-9100batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=68.492, loss_att=54.068, acc=0.716, loss=58.395, backward_time=1.029, grad_norm=140.544, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.597e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 22:36:03,875 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-12 22:36:22,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c8681f0>)
+[gpub002:0/64] 2023-07-12 22:36:26,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-12 22:36:26,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 22:42:43,033 (trainer:732) INFO: 41epoch:train:9101-9200batch: iter_time=1.881, forward_time=0.184, loss_ctc=68.469, loss_att=50.264, acc=0.718, loss=55.726, backward_time=1.044, grad_norm=106.621, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.596e-05, train_time=9.838
+[gpub002:0/64] 2023-07-12 22:45:00,788 (trainer:732) INFO: 41epoch:train:9201-9300batch: iter_time=1.293e-04, forward_time=0.148, loss_ctc=68.471, loss_att=53.732, acc=0.720, loss=58.153, backward_time=1.035, grad_norm=122.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.596e-05, train_time=2.755
+[gpub002:0/64] 2023-07-12 22:47:19,808 (trainer:732) INFO: 41epoch:train:9301-9400batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=65.914, loss_att=48.978, acc=0.714, loss=54.059, backward_time=1.037, grad_norm=109.971, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.595e-05, train_time=2.780
+[gpub002:0/64] 2023-07-12 22:49:36,096 (trainer:732) INFO: 41epoch:train:9401-9500batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=68.014, loss_att=54.562, acc=0.726, loss=58.597, backward_time=1.029, grad_norm=117.199, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.594e-05, train_time=2.726
+[gpub002:0/64] 2023-07-12 22:51:51,997 (trainer:732) INFO: 41epoch:train:9501-9600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=71.249, loss_att=53.735, acc=0.733, loss=58.989, backward_time=1.029, grad_norm=138.178, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.718
+[gpub002:0/64] 2023-07-12 22:54:07,882 (trainer:732) INFO: 41epoch:train:9601-9700batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=66.076, loss_att=52.907, acc=0.713, loss=56.858, backward_time=1.028, grad_norm=106.233, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.593e-05, train_time=2.717
+[gpub002:0/64] 2023-07-12 22:56:23,823 (trainer:732) INFO: 41epoch:train:9701-9800batch: iter_time=1.160e-04, forward_time=0.146, loss_ctc=62.985, loss_att=48.457, acc=0.729, loss=52.816, backward_time=1.029, grad_norm=112.368, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.592e-05, train_time=2.719
+[gpub002:0/64] 2023-07-12 22:58:40,560 (trainer:732) INFO: 41epoch:train:9801-9900batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=74.657, loss_att=55.990, acc=0.719, loss=61.590, backward_time=1.027, grad_norm=113.466, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.735
+[gpub002:0/64] 2023-07-12 23:00:56,830 (trainer:732) INFO: 41epoch:train:9901-10000batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=72.076, loss_att=56.216, acc=0.714, loss=60.974, backward_time=1.030, grad_norm=131.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.591e-05, train_time=2.725
+[gpub002:0/64] 2023-07-12 23:18:11,678 (trainer:338) INFO: 41epoch results: [train] iter_time=0.233, forward_time=0.147, loss_ctc=69.121, loss_att=53.257, acc=0.715, loss=58.016, backward_time=1.032, grad_norm=120.506, clip=100.000, loss_scale=1.781e+32, optim_step_time=0.182, optim0_lr0=5.626e-05, train_time=3.453, time=4 hours, 48 minutes and 7.1 seconds, total_count=380000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.125, cer_ctc=0.260, loss_att=39.542, acc=0.666, cer=0.430, wer=0.999, loss=40.917, time=8 minutes and 29.09 seconds, total_count=38962, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 26.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.572
+[gpub002:0/64] 2023-07-12 23:18:30,669 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/36epoch.pth
+[gpub002:0/64] 2023-07-12 23:18:30,800 (trainer:272) INFO: 42/50epoch started. Estimated time to finish: 1 day, 20 hours and 54 minutes
+[gpub002:0/64] 2023-07-12 23:18:31,369 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-12 23:18:49,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1bb6e86d0>)
+[gpub002:0/64] 2023-07-12 23:18:55,229 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-12 23:18:55,791 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-12 23:37:37,107 (trainer:732) INFO: 42epoch:train:1-100batch: iter_time=8.724, forward_time=1.113, loss_ctc=71.175, loss_att=49.430, acc=0.718, loss=55.953, backward_time=1.227, grad_norm=114.213, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.228, optim0_lr0=5.590e-05, train_time=22.913
+[gpub002:0/64] 2023-07-12 23:42:11,727 (trainer:732) INFO: 42epoch:train:101-200batch: iter_time=0.006, forward_time=0.982, loss_ctc=69.942, loss_att=54.374, acc=0.696, loss=59.044, backward_time=1.246, grad_norm=121.482, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.288, optim0_lr0=5.589e-05, train_time=5.491
+[gpub002:0/64] 2023-07-12 23:47:05,839 (trainer:732) INFO: 42epoch:train:201-300batch: iter_time=0.017, forward_time=1.206, loss_ctc=61.801, loss_att=45.949, acc=0.719, loss=50.705, backward_time=1.243, grad_norm=123.757, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.240, optim0_lr0=5.589e-05, train_time=5.881
+[gpub002:0/64] 2023-07-12 23:50:49,717 (trainer:732) INFO: 42epoch:train:301-400batch: iter_time=0.004, forward_time=0.743, loss_ctc=73.412, loss_att=51.589, acc=0.721, loss=58.136, backward_time=1.151, grad_norm=125.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.222, optim0_lr0=5.588e-05, train_time=4.480
+[gpub002:0/64] 2023-07-12 23:54:19,195 (trainer:732) INFO: 42epoch:train:401-500batch: iter_time=0.012, forward_time=0.551, loss_ctc=82.982, loss_att=60.880, acc=0.688, loss=67.511, backward_time=1.134, grad_norm=124.069, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.209, optim0_lr0=5.587e-05, train_time=4.189
+[gpub002:0/64] 2023-07-12 23:57:28,757 (trainer:732) INFO: 42epoch:train:501-600batch: iter_time=0.002, forward_time=0.518, loss_ctc=78.082, loss_att=61.663, acc=0.708, loss=66.589, backward_time=1.094, grad_norm=124.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.201, optim0_lr0=5.586e-05, train_time=3.791
+[gpub002:0/64] 2023-07-13 00:00:25,025 (trainer:732) INFO: 42epoch:train:601-700batch: iter_time=6.864e-04, forward_time=0.420, loss_ctc=79.441, loss_att=60.935, acc=0.691, loss=66.487, backward_time=1.078, grad_norm=128.577, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.199, optim0_lr0=5.586e-05, train_time=3.526
+[gpub002:0/64] 2023-07-13 00:03:02,236 (trainer:732) INFO: 42epoch:train:701-800batch: iter_time=4.233e-04, forward_time=0.282, loss_ctc=72.996, loss_att=53.014, acc=0.712, loss=59.009, backward_time=1.066, grad_norm=122.990, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.197, optim0_lr0=5.585e-05, train_time=3.144
+[gpub002:0/64] 2023-07-13 00:04:00,778 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 00:04:18,719 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3d0e3fd0>)
+[gpub002:0/64] 2023-07-13 00:04:22,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 00:04:22,409 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 00:09:56,081 (trainer:732) INFO: 42epoch:train:801-900batch: iter_time=2.590, forward_time=0.185, loss_ctc=73.051, loss_att=54.592, acc=0.715, loss=60.130, backward_time=1.048, grad_norm=128.053, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.584e-05, train_time=8.279
+[gpub002:0/64] 2023-07-13 00:12:13,867 (trainer:732) INFO: 42epoch:train:901-1000batch: iter_time=1.283e-04, forward_time=0.148, loss_ctc=66.524, loss_att=49.934, acc=0.708, loss=54.911, backward_time=1.035, grad_norm=121.941, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.584e-05, train_time=2.756
+[gpub002:0/64] 2023-07-13 00:14:30,359 (trainer:732) INFO: 42epoch:train:1001-1100batch: iter_time=1.139e-04, forward_time=0.147, loss_ctc=61.394, loss_att=46.990, acc=0.713, loss=51.311, backward_time=1.031, grad_norm=98.338, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.583e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 00:16:46,091 (trainer:732) INFO: 42epoch:train:1101-1200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=68.116, loss_att=48.127, acc=0.735, loss=54.124, backward_time=1.028, grad_norm=118.499, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 00:19:03,313 (trainer:732) INFO: 42epoch:train:1201-1300batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=78.528, loss_att=57.137, acc=0.710, loss=63.554, backward_time=1.036, grad_norm=124.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.582e-05, train_time=2.744
+[gpub002:0/64] 2023-07-13 00:21:20,377 (trainer:732) INFO: 42epoch:train:1301-1400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=79.593, loss_att=62.972, acc=0.704, loss=67.958, backward_time=1.031, grad_norm=134.092, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.581e-05, train_time=2.741
+[gpub002:0/64] 2023-07-13 00:23:54,097 (trainer:732) INFO: 42epoch:train:1401-1500batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=74.007, loss_att=54.221, acc=0.718, loss=60.157, backward_time=1.048, grad_norm=127.878, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.580e-05, train_time=3.074
+[gpub002:0/64] 2023-07-13 00:26:09,909 (trainer:732) INFO: 42epoch:train:1501-1600batch: iter_time=1.349e-04, forward_time=0.144, loss_ctc=75.275, loss_att=58.574, acc=0.702, loss=63.584, backward_time=1.029, grad_norm=121.251, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.579e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 00:27:59,966 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 00:28:18,102 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 00:28:22,132 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2e935690>)
+[gpub002:0/64] 2023-07-13 00:28:22,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 00:28:22,317 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 00:35:20,703 (trainer:732) INFO: 42epoch:train:1601-1700batch: iter_time=4.060, forward_time=0.198, loss_ctc=75.398, loss_att=54.018, acc=0.726, loss=60.432, backward_time=1.041, grad_norm=136.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.579e-05, train_time=11.016
+[gpub002:0/64] 2023-07-13 00:36:32,130 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 00:37:37,330 (trainer:732) INFO: 42epoch:train:1701-1800batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=64.814, loss_att=48.109, acc=0.703, loss=53.121, backward_time=1.033, grad_norm=135.699, clip=100.000, loss_scale=1.225e+32, optim_step_time=0.182, optim0_lr0=5.578e-05, train_time=2.732
+[gpub002:0/64] 2023-07-13 00:39:53,639 (trainer:732) INFO: 42epoch:train:1801-1900batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=62.019, loss_att=46.732, acc=0.717, loss=51.318, backward_time=1.032, grad_norm=99.223, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 00:42:09,855 (trainer:732) INFO: 42epoch:train:1901-2000batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.823, loss_att=48.017, acc=0.738, loss=54.259, backward_time=1.028, grad_norm=118.420, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.577e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 00:44:25,673 (trainer:732) INFO: 42epoch:train:2001-2100batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=81.313, loss_att=58.446, acc=0.706, loss=65.306, backward_time=1.028, grad_norm=134.672, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.576e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 00:46:41,649 (trainer:732) INFO: 42epoch:train:2101-2200batch: iter_time=1.066e-04, forward_time=0.146, loss_ctc=78.344, loss_att=62.937, acc=0.702, loss=67.559, backward_time=1.030, grad_norm=151.716, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 00:48:57,917 (trainer:732) INFO: 42epoch:train:2201-2300batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=75.719, loss_att=57.616, acc=0.712, loss=63.047, backward_time=1.032, grad_norm=184.811, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.575e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 00:51:28,156 (trainer:732) INFO: 42epoch:train:2301-2400batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=73.652, loss_att=53.750, acc=0.713, loss=59.720, backward_time=1.049, grad_norm=115.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.574e-05, train_time=3.005
+[gpub002:0/64] 2023-07-13 00:53:44,309 (trainer:732) INFO: 42epoch:train:2401-2500batch: iter_time=1.099e-04, forward_time=0.146, loss_ctc=70.966, loss_att=49.355, acc=0.718, loss=55.839, backward_time=1.028, grad_norm=106.942, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 00:53:46,681 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 00:54:04,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3c7574f0>)
+[gpub002:0/64] 2023-07-13 00:54:08,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 00:54:08,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 01:01:39,737 (trainer:732) INFO: 42epoch:train:2501-2600batch: iter_time=1.820, forward_time=0.145, loss_ctc=69.815, loss_att=48.599, acc=0.726, loss=54.963, backward_time=1.041, grad_norm=140.834, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.573e-05, train_time=9.508
+[gpub002:0/64] 2023-07-13 01:03:56,212 (trainer:732) INFO: 42epoch:train:2601-2700batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=67.653, loss_att=52.431, acc=0.712, loss=56.997, backward_time=1.031, grad_norm=109.062, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.572e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 01:06:11,899 (trainer:732) INFO: 42epoch:train:2701-2800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=59.663, loss_att=44.027, acc=0.729, loss=48.718, backward_time=1.028, grad_norm=114.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.571e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 01:08:27,521 (trainer:732) INFO: 42epoch:train:2801-2900batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=71.051, loss_att=47.872, acc=0.736, loss=54.826, backward_time=1.028, grad_norm=161.477, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 01:10:43,390 (trainer:732) INFO: 42epoch:train:2901-3000batch: iter_time=1.227e-04, forward_time=0.147, loss_ctc=80.680, loss_att=59.876, acc=0.698, loss=66.117, backward_time=1.029, grad_norm=125.354, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.570e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 01:12:59,481 (trainer:732) INFO: 42epoch:train:3001-3100batch: iter_time=1.471e-04, forward_time=0.146, loss_ctc=74.273, loss_att=57.896, acc=0.724, loss=62.809, backward_time=1.030, grad_norm=158.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.569e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 01:15:15,668 (trainer:732) INFO: 42epoch:train:3101-3200batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=78.031, loss_att=59.327, acc=0.704, loss=64.938, backward_time=1.030, grad_norm=153.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.568e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 01:17:36,473 (trainer:732) INFO: 42epoch:train:3201-3300batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=73.496, loss_att=52.683, acc=0.721, loss=58.927, backward_time=1.028, grad_norm=135.257, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.568e-05, train_time=2.816
+[gpub002:0/64] 2023-07-13 01:18:26,019 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 01:18:44,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4cb7fbb0>)
+[gpub002:0/64] 2023-07-13 01:18:47,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 01:18:47,937 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 01:25:11,697 (trainer:732) INFO: 42epoch:train:3301-3400batch: iter_time=1.638, forward_time=0.194, loss_ctc=72.538, loss_att=53.392, acc=0.723, loss=59.136, backward_time=1.050, grad_norm=121.024, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=5.567e-05, train_time=9.103
+[gpub002:0/64] 2023-07-13 01:27:27,626 (trainer:732) INFO: 42epoch:train:3401-3500batch: iter_time=1.464e-04, forward_time=0.146, loss_ctc=67.316, loss_att=49.889, acc=0.703, loss=55.117, backward_time=1.029, grad_norm=129.513, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.566e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 01:29:43,626 (trainer:732) INFO: 42epoch:train:3501-3600batch: iter_time=1.305e-04, forward_time=0.146, loss_ctc=61.606, loss_att=46.689, acc=0.718, loss=51.164, backward_time=1.029, grad_norm=108.622, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.566e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 01:31:59,663 (trainer:732) INFO: 42epoch:train:3601-3700batch: iter_time=1.609e-04, forward_time=0.146, loss_ctc=67.195, loss_att=48.227, acc=0.733, loss=53.917, backward_time=1.029, grad_norm=126.501, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.565e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 01:34:15,629 (trainer:732) INFO: 42epoch:train:3701-3800batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=76.534, loss_att=56.304, acc=0.712, loss=62.373, backward_time=1.030, grad_norm=117.496, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 01:36:31,874 (trainer:732) INFO: 42epoch:train:3801-3900batch: iter_time=1.527e-04, forward_time=0.147, loss_ctc=79.688, loss_att=61.818, acc=0.707, loss=67.179, backward_time=1.031, grad_norm=127.586, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.564e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 01:38:47,365 (trainer:732) INFO: 42epoch:train:3901-4000batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=73.126, loss_att=53.845, acc=0.710, loss=59.629, backward_time=1.026, grad_norm=113.224, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.563e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 01:41:03,491 (trainer:732) INFO: 42epoch:train:4001-4100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=73.877, loss_att=58.523, acc=0.696, loss=63.129, backward_time=1.030, grad_norm=130.725, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.562e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 01:42:49,251 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 01:43:07,433 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 01:43:10,890 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c6a74f0>)
+[gpub002:0/64] 2023-07-13 01:43:10,891 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 01:43:10,897 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 01:48:33,517 (trainer:732) INFO: 42epoch:train:4101-4200batch: iter_time=2.988, forward_time=0.147, loss_ctc=74.164, loss_att=53.456, acc=0.719, loss=59.668, backward_time=1.039, grad_norm=115.908, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.561e-05, train_time=9.000
+[gpub002:0/64] 2023-07-13 01:50:50,772 (trainer:732) INFO: 42epoch:train:4201-4300batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.361, loss_att=46.601, acc=0.711, loss=51.929, backward_time=1.033, grad_norm=103.878, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.561e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 01:53:07,508 (trainer:732) INFO: 42epoch:train:4301-4400batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=61.857, loss_att=46.652, acc=0.722, loss=51.213, backward_time=1.030, grad_norm=101.562, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.560e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 01:55:23,236 (trainer:732) INFO: 42epoch:train:4401-4500batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=69.241, loss_att=48.461, acc=0.741, loss=54.695, backward_time=1.028, grad_norm=128.802, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 01:57:42,177 (trainer:732) INFO: 42epoch:train:4501-4600batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=80.204, loss_att=58.270, acc=0.708, loss=64.850, backward_time=1.034, grad_norm=105.180, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.559e-05, train_time=2.779
+[gpub002:0/64] 2023-07-13 01:59:58,237 (trainer:732) INFO: 42epoch:train:4601-4700batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=76.178, loss_att=61.031, acc=0.708, loss=65.575, backward_time=1.030, grad_norm=119.146, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.558e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 02:02:14,580 (trainer:732) INFO: 42epoch:train:4701-4800batch: iter_time=1.163e-04, forward_time=0.147, loss_ctc=75.844, loss_att=58.430, acc=0.715, loss=63.654, backward_time=1.034, grad_norm=126.411, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 02:04:30,380 (trainer:732) INFO: 42epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=73.955, loss_att=54.425, acc=0.710, loss=60.284, backward_time=1.028, grad_norm=110.702, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.557e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 02:06:46,081 (trainer:732) INFO: 42epoch:train:4901-5000batch: iter_time=1.107e-04, forward_time=0.146, loss_ctc=71.515, loss_att=48.756, acc=0.724, loss=55.584, backward_time=1.029, grad_norm=124.882, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.556e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 02:06:47,710 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 02:07:06,009 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb77976c1f0>)
+[gpub002:0/64] 2023-07-13 02:07:09,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 02:07:09,432 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 02:14:16,140 (trainer:732) INFO: 42epoch:train:5001-5100batch: iter_time=1.636, forward_time=0.145, loss_ctc=69.401, loss_att=50.532, acc=0.709, loss=56.193, backward_time=1.040, grad_norm=142.315, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=9.001
+[gpub002:0/64] 2023-07-13 02:16:32,200 (trainer:732) INFO: 42epoch:train:5101-5200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.757, loss_att=49.932, acc=0.709, loss=54.679, backward_time=1.028, grad_norm=114.490, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.555e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 02:18:52,290 (trainer:732) INFO: 42epoch:train:5201-5300batch: iter_time=1.255e-04, forward_time=0.144, loss_ctc=64.010, loss_att=44.270, acc=0.738, loss=50.192, backward_time=1.032, grad_norm=105.407, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.554e-05, train_time=2.802
+[gpub002:0/64] 2023-07-13 02:21:08,766 (trainer:732) INFO: 42epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.481, loss_att=53.258, acc=0.712, loss=58.425, backward_time=1.033, grad_norm=117.124, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 02:23:24,679 (trainer:732) INFO: 42epoch:train:5401-5500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=79.697, loss_att=60.747, acc=0.706, loss=66.432, backward_time=1.030, grad_norm=129.480, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.553e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 02:25:41,523 (trainer:732) INFO: 42epoch:train:5501-5600batch: iter_time=1.228e-04, forward_time=0.149, loss_ctc=72.910, loss_att=55.688, acc=0.712, loss=60.854, backward_time=1.030, grad_norm=149.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.552e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 02:27:57,260 (trainer:732) INFO: 42epoch:train:5601-5700batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=77.018, loss_att=59.105, acc=0.700, loss=64.479, backward_time=1.028, grad_norm=131.998, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.551e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 02:30:13,284 (trainer:732) INFO: 42epoch:train:5701-5800batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=73.572, loss_att=53.577, acc=0.722, loss=59.576, backward_time=1.030, grad_norm=103.209, clip=100.000, loss_scale=1.201e+32, optim_step_time=0.182, optim0_lr0=5.551e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 02:31:01,533 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 02:31:19,805 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb7797774f0>)
+[gpub002:0/64] 2023-07-13 02:31:23,527 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 02:31:23,533 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 02:38:09,529 (trainer:732) INFO: 42epoch:train:5801-5900batch: iter_time=1.672, forward_time=0.223, loss_ctc=66.753, loss_att=45.931, acc=0.710, loss=52.177, backward_time=1.044, grad_norm=115.552, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.550e-05, train_time=9.524
+[gpub002:0/64] 2023-07-13 02:40:27,216 (trainer:732) INFO: 42epoch:train:5901-6000batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=66.955, loss_att=51.180, acc=0.714, loss=55.913, backward_time=1.032, grad_norm=103.524, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.549e-05, train_time=2.754
+[gpub002:0/64] 2023-07-13 02:42:42,739 (trainer:732) INFO: 42epoch:train:6001-6100batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=62.586, loss_att=45.365, acc=0.728, loss=50.531, backward_time=1.026, grad_norm=94.871, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 02:44:58,673 (trainer:732) INFO: 42epoch:train:6101-6200batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=70.502, loss_att=52.029, acc=0.726, loss=57.571, backward_time=1.029, grad_norm=120.129, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.548e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 02:47:14,390 (trainer:732) INFO: 42epoch:train:6201-6300batch: iter_time=1.266e-04, forward_time=0.144, loss_ctc=80.294, loss_att=60.866, acc=0.691, loss=66.694, backward_time=1.027, grad_norm=141.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.547e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 02:49:29,945 (trainer:732) INFO: 42epoch:train:6301-6400batch: iter_time=1.154e-04, forward_time=0.144, loss_ctc=74.026, loss_att=55.778, acc=0.718, loss=61.252, backward_time=1.026, grad_norm=131.830, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 02:51:45,921 (trainer:732) INFO: 42epoch:train:6401-6500batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=77.487, loss_att=57.948, acc=0.707, loss=63.810, backward_time=1.028, grad_norm=135.680, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.546e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 02:54:01,435 (trainer:732) INFO: 42epoch:train:6501-6600batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=69.023, loss_att=50.317, acc=0.715, loss=55.929, backward_time=1.026, grad_norm=121.503, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.545e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 02:55:35,752 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 02:55:53,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb73efff460>)
+[gpub002:0/64] 2023-07-13 02:55:57,465 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 02:55:57,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 03:02:27,111 (trainer:732) INFO: 42epoch:train:6601-6700batch: iter_time=3.598, forward_time=0.208, loss_ctc=68.057, loss_att=47.036, acc=0.718, loss=53.342, backward_time=1.042, grad_norm=118.857, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.544e-05, train_time=10.113
+[gpub002:0/64] 2023-07-13 03:04:44,821 (trainer:732) INFO: 42epoch:train:6701-6800batch: iter_time=1.149e-04, forward_time=0.148, loss_ctc=68.438, loss_att=52.085, acc=0.723, loss=56.991, backward_time=1.034, grad_norm=140.459, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.544e-05, train_time=2.754
+[gpub002:0/64] 2023-07-13 03:07:01,317 (trainer:732) INFO: 42epoch:train:6801-6900batch: iter_time=1.279e-04, forward_time=0.145, loss_ctc=64.735, loss_att=49.425, acc=0.714, loss=54.018, backward_time=1.027, grad_norm=114.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.543e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 03:09:19,001 (trainer:732) INFO: 42epoch:train:6901-7000batch: iter_time=0.004, forward_time=0.147, loss_ctc=64.172, loss_att=44.513, acc=0.739, loss=50.411, backward_time=1.035, grad_norm=108.401, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.542e-05, train_time=2.753
+[gpub002:0/64] 2023-07-13 03:11:34,913 (trainer:732) INFO: 42epoch:train:7001-7100batch: iter_time=0.002, forward_time=0.145, loss_ctc=70.024, loss_att=52.180, acc=0.717, loss=57.534, backward_time=1.028, grad_norm=133.620, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.542e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 03:14:01,730 (trainer:732) INFO: 42epoch:train:7101-7200batch: iter_time=1.283e-04, forward_time=0.217, loss_ctc=80.096, loss_att=59.082, acc=0.709, loss=65.386, backward_time=1.048, grad_norm=121.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.541e-05, train_time=2.936
+[gpub002:0/64] 2023-07-13 03:16:18,403 (trainer:732) INFO: 42epoch:train:7201-7300batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=72.737, loss_att=55.530, acc=0.720, loss=60.692, backward_time=1.031, grad_norm=126.104, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 03:18:34,604 (trainer:732) INFO: 42epoch:train:7301-7400batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=76.887, loss_att=59.261, acc=0.713, loss=64.549, backward_time=1.028, grad_norm=153.123, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.540e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 03:20:50,723 (trainer:732) INFO: 42epoch:train:7401-7500batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=74.305, loss_att=53.789, acc=0.728, loss=59.944, backward_time=1.028, grad_norm=120.151, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.539e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 03:21:11,975 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 03:21:30,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb7788a3610>)
+[gpub002:0/64] 2023-07-13 03:21:33,941 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 03:21:33,947 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 03:28:50,459 (trainer:732) INFO: 42epoch:train:7501-7600batch: iter_time=3.260, forward_time=0.146, loss_ctc=68.511, loss_att=47.602, acc=0.728, loss=53.875, backward_time=1.046, grad_norm=119.300, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=9.595
+[gpub002:0/64] 2023-07-13 03:31:08,080 (trainer:732) INFO: 42epoch:train:7601-7700batch: iter_time=1.319e-04, forward_time=0.145, loss_ctc=65.544, loss_att=50.965, acc=0.711, loss=55.339, backward_time=1.032, grad_norm=117.013, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.538e-05, train_time=2.752
+[gpub002:0/64] 2023-07-13 03:33:24,531 (trainer:732) INFO: 42epoch:train:7701-7800batch: iter_time=1.504e-04, forward_time=0.147, loss_ctc=59.954, loss_att=44.149, acc=0.726, loss=48.890, backward_time=1.030, grad_norm=111.944, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.537e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 03:35:39,885 (trainer:732) INFO: 42epoch:train:7801-7900batch: iter_time=1.352e-04, forward_time=0.144, loss_ctc=71.306, loss_att=50.202, acc=0.729, loss=56.534, backward_time=1.025, grad_norm=132.190, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=2.707
+[gpub002:0/64] 2023-07-13 03:38:11,937 (trainer:732) INFO: 42epoch:train:7901-8000batch: iter_time=1.097e-04, forward_time=0.144, loss_ctc=81.280, loss_att=59.731, acc=0.695, loss=66.195, backward_time=1.044, grad_norm=116.322, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.536e-05, train_time=3.041
+[gpub002:0/64] 2023-07-13 03:40:27,681 (trainer:732) INFO: 42epoch:train:8001-8100batch: iter_time=1.301e-04, forward_time=0.144, loss_ctc=73.657, loss_att=57.378, acc=0.722, loss=62.262, backward_time=1.028, grad_norm=119.137, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.535e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 03:42:43,340 (trainer:732) INFO: 42epoch:train:8101-8200batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=77.258, loss_att=59.529, acc=0.697, loss=64.848, backward_time=1.029, grad_norm=124.385, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.534e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 03:44:58,939 (trainer:732) INFO: 42epoch:train:8201-8300batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=70.039, loss_att=50.529, acc=0.722, loss=56.382, backward_time=1.028, grad_norm=114.947, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 03:45:46,940 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 03:46:05,424 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac1270a9b0>)
+[gpub002:0/64] 2023-07-13 03:46:08,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 03:46:08,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 03:53:07,279 (trainer:732) INFO: 42epoch:train:8301-8400batch: iter_time=1.680, forward_time=0.177, loss_ctc=70.901, loss_att=52.528, acc=0.721, loss=58.040, backward_time=1.041, grad_norm=127.142, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.533e-05, train_time=9.767
+[gpub002:0/64] 2023-07-13 03:55:25,674 (trainer:732) INFO: 42epoch:train:8401-8500batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=63.924, loss_att=47.108, acc=0.710, loss=52.153, backward_time=1.037, grad_norm=118.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.532e-05, train_time=2.768
+[gpub002:0/64] 2023-07-13 03:57:42,100 (trainer:732) INFO: 42epoch:train:8501-8600batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=63.131, loss_att=46.587, acc=0.723, loss=51.551, backward_time=1.029, grad_norm=92.654, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 04:00:13,714 (trainer:732) INFO: 42epoch:train:8601-8700batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=68.642, loss_att=48.513, acc=0.737, loss=54.552, backward_time=1.043, grad_norm=114.432, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.531e-05, train_time=3.032
+[gpub002:0/64] 2023-07-13 04:02:29,944 (trainer:732) INFO: 42epoch:train:8701-8800batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=75.843, loss_att=56.720, acc=0.713, loss=62.457, backward_time=1.028, grad_norm=108.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.530e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 04:04:45,889 (trainer:732) INFO: 42epoch:train:8801-8900batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=77.727, loss_att=61.456, acc=0.711, loss=66.338, backward_time=1.029, grad_norm=118.766, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 04:07:01,510 (trainer:732) INFO: 42epoch:train:8901-9000batch: iter_time=1.245e-04, forward_time=0.144, loss_ctc=72.948, loss_att=53.187, acc=0.715, loss=59.115, backward_time=1.027, grad_norm=137.792, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.529e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 04:09:17,733 (trainer:732) INFO: 42epoch:train:9001-9100batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=75.533, loss_att=57.845, acc=0.699, loss=63.151, backward_time=1.031, grad_norm=119.867, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.528e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 04:10:50,524 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 04:11:08,648 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac1272f460>)
+[gpub002:0/64] 2023-07-13 04:11:12,048 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 04:11:12,055 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 04:16:46,199 (trainer:732) INFO: 42epoch:train:9101-9200batch: iter_time=1.637, forward_time=0.145, loss_ctc=71.833, loss_att=48.627, acc=0.732, loss=55.589, backward_time=1.042, grad_norm=104.157, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=8.969
+[gpub002:0/64] 2023-07-13 04:19:03,487 (trainer:732) INFO: 42epoch:train:9201-9300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.367, loss_att=51.468, acc=0.722, loss=56.238, backward_time=1.032, grad_norm=120.743, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.527e-05, train_time=2.746
+[gpub002:0/64] 2023-07-13 04:21:20,258 (trainer:732) INFO: 42epoch:train:9301-9400batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=65.408, loss_att=50.219, acc=0.717, loss=54.776, backward_time=1.030, grad_norm=110.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.526e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 04:23:36,660 (trainer:732) INFO: 42epoch:train:9401-9500batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=63.769, loss_att=43.754, acc=0.739, loss=49.759, backward_time=1.025, grad_norm=107.213, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 04:25:52,274 (trainer:732) INFO: 42epoch:train:9501-9600batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=70.289, loss_att=52.121, acc=0.718, loss=57.571, backward_time=1.025, grad_norm=122.835, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.525e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 04:28:08,506 (trainer:732) INFO: 42epoch:train:9601-9700batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=81.429, loss_att=62.500, acc=0.706, loss=68.178, backward_time=1.028, grad_norm=125.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.524e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 04:30:24,543 (trainer:732) INFO: 42epoch:train:9701-9800batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=71.177, loss_att=53.266, acc=0.725, loss=58.639, backward_time=1.027, grad_norm=126.374, clip=100.000, loss_scale=2.401e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 04:32:40,550 (trainer:732) INFO: 42epoch:train:9801-9900batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=75.760, loss_att=57.853, acc=0.719, loss=63.225, backward_time=1.030, grad_norm=129.317, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.523e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 04:34:56,397 (trainer:732) INFO: 42epoch:train:9901-10000batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=73.059, loss_att=52.842, acc=0.731, loss=58.907, backward_time=1.028, grad_norm=109.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.522e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 04:50:54,355 (trainer:338) INFO: 42epoch results: [train] iter_time=0.354, forward_time=0.196, loss_ctc=71.674, loss_att=53.129, acc=0.716, loss=58.693, backward_time=1.042, grad_norm=122.970, clip=100.000, loss_scale=1.303e+32, optim_step_time=0.185, optim0_lr0=5.556e-05, train_time=3.797, time=5 hours, 16 minutes and 39.53 seconds, total_count=390000, gpu_max_cached_mem_GB=37.572, [valid] loss_ctc=44.090, cer_ctc=0.260, loss_att=36.484, acc=0.697, cer=0.342, wer=0.989, loss=38.766, time=6 minutes and 47.34 seconds, total_count=39974, gpu_max_cached_mem_GB=37.572, [att_plot] time=8 minutes and 56.53 seconds, total_count=0, gpu_max_cached_mem_GB=37.572
+[gpub002:0/64] 2023-07-13 04:51:13,901 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub002:0/64] 2023-07-13 04:51:13,945 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/31epoch.pth
+[gpub002:0/64] 2023-07-13 04:51:14,020 (trainer:272) INFO: 43/50epoch started. Estimated time to finish: 1 day, 17 hours and 23 minutes
+[gpub002:0/64] 2023-07-13 04:51:15,288 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 04:51:35,527 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 04:51:39,096 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbd8dfe20>)
+[gpub002:0/64] 2023-07-13 04:51:39,097 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 04:51:39,159 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 04:58:53,737 (trainer:732) INFO: 43epoch:train:1-100batch: iter_time=3.165, forward_time=0.177, loss_ctc=67.171, loss_att=52.136, acc=0.696, loss=56.646, backward_time=1.043, grad_norm=125.404, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.521e-05, train_time=9.181
+[gpub002:0/64] 2023-07-13 05:01:09,947 (trainer:732) INFO: 43epoch:train:101-200batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=74.299, loss_att=53.346, acc=0.702, loss=59.632, backward_time=1.031, grad_norm=143.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.521e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 05:03:26,486 (trainer:732) INFO: 43epoch:train:201-300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=72.274, loss_att=51.180, acc=0.712, loss=57.508, backward_time=1.030, grad_norm=127.012, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.520e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 05:05:44,653 (trainer:732) INFO: 43epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.149, loss_ctc=74.766, loss_att=54.066, acc=0.696, loss=60.276, backward_time=1.031, grad_norm=130.116, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.763
+[gpub002:0/64] 2023-07-13 05:08:01,530 (trainer:732) INFO: 43epoch:train:401-500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.776, loss_att=53.497, acc=0.706, loss=58.381, backward_time=1.029, grad_norm=113.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.519e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 05:10:24,010 (trainer:732) INFO: 43epoch:train:501-600batch: iter_time=3.155e-04, forward_time=0.146, loss_ctc=78.761, loss_att=55.515, acc=0.699, loss=62.489, backward_time=1.032, grad_norm=125.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.518e-05, train_time=2.849
+[gpub002:0/64] 2023-07-13 05:12:54,234 (trainer:732) INFO: 43epoch:train:601-700batch: iter_time=1.062e-04, forward_time=0.144, loss_ctc=71.370, loss_att=43.750, acc=0.721, loss=52.036, backward_time=1.053, grad_norm=126.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.517e-05, train_time=3.004
+[gpub002:0/64] 2023-07-13 05:15:26,303 (trainer:732) INFO: 43epoch:train:701-800batch: iter_time=5.144e-04, forward_time=0.179, loss_ctc=66.772, loss_att=49.511, acc=0.712, loss=54.690, backward_time=1.041, grad_norm=121.595, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.517e-05, train_time=3.041
+[gpub002:0/64] 2023-07-13 05:16:19,570 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 05:16:37,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbd8df0a0>)
+[gpub002:0/64] 2023-07-13 05:16:40,677 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 05:16:40,684 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 05:23:58,859 (trainer:732) INFO: 43epoch:train:801-900batch: iter_time=1.808, forward_time=0.186, loss_ctc=68.942, loss_att=51.380, acc=0.705, loss=56.649, backward_time=1.041, grad_norm=122.084, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.516e-05, train_time=10.251
+[gpub002:0/64] 2023-07-13 05:26:15,903 (trainer:732) INFO: 43epoch:train:901-1000batch: iter_time=1.277e-04, forward_time=0.148, loss_ctc=67.154, loss_att=51.145, acc=0.718, loss=55.947, backward_time=1.034, grad_norm=100.494, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.741
+[gpub002:0/64] 2023-07-13 05:28:31,874 (trainer:732) INFO: 43epoch:train:1001-1100batch: iter_time=1.304e-04, forward_time=0.145, loss_ctc=74.706, loss_att=53.692, acc=0.714, loss=59.997, backward_time=1.028, grad_norm=134.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.515e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 05:30:50,785 (trainer:732) INFO: 43epoch:train:1101-1200batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=70.458, loss_att=49.738, acc=0.704, loss=55.954, backward_time=1.031, grad_norm=126.190, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.514e-05, train_time=2.778
+[gpub002:0/64] 2023-07-13 05:33:07,145 (trainer:732) INFO: 43epoch:train:1201-1300batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=76.640, loss_att=58.441, acc=0.723, loss=63.900, backward_time=1.030, grad_norm=120.803, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.513e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 05:35:42,951 (trainer:732) INFO: 43epoch:train:1301-1400batch: iter_time=0.003, forward_time=0.281, loss_ctc=63.792, loss_att=48.848, acc=0.704, loss=53.331, backward_time=1.064, grad_norm=120.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.193, optim0_lr0=5.513e-05, train_time=3.115
+[gpub002:0/64] 2023-07-13 05:37:59,663 (trainer:732) INFO: 43epoch:train:1401-1500batch: iter_time=1.387e-04, forward_time=0.147, loss_ctc=73.107, loss_att=46.764, acc=0.723, loss=54.667, backward_time=1.030, grad_norm=135.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.512e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 05:40:17,904 (trainer:732) INFO: 43epoch:train:1501-1600batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=66.824, loss_att=47.833, acc=0.731, loss=53.530, backward_time=1.030, grad_norm=106.942, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=2.765
+[gpub002:0/64] 2023-07-13 05:41:55,175 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 05:42:13,121 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1eb3884f0>)
+[gpub002:0/64] 2023-07-13 05:42:16,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 05:42:16,552 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 05:46:42,778 (trainer:732) INFO: 43epoch:train:1601-1700batch: iter_time=2.035, forward_time=0.145, loss_ctc=64.829, loss_att=45.468, acc=0.725, loss=51.276, backward_time=1.045, grad_norm=109.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.511e-05, train_time=7.697
+[gpub002:0/64] 2023-07-13 05:49:04,557 (trainer:732) INFO: 43epoch:train:1701-1800batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=68.037, loss_att=51.317, acc=0.717, loss=56.333, backward_time=1.039, grad_norm=110.998, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.510e-05, train_time=2.835
+[gpub002:0/64] 2023-07-13 05:51:20,869 (trainer:732) INFO: 43epoch:train:1801-1900batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=78.940, loss_att=57.649, acc=0.709, loss=64.036, backward_time=1.028, grad_norm=127.683, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 05:53:36,717 (trainer:732) INFO: 43epoch:train:1901-2000batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=62.792, loss_att=44.461, acc=0.716, loss=49.960, backward_time=1.025, grad_norm=98.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.509e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 05:55:52,706 (trainer:732) INFO: 43epoch:train:2001-2100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=81.040, loss_att=61.512, acc=0.713, loss=67.371, backward_time=1.029, grad_norm=130.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.508e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 05:58:11,248 (trainer:732) INFO: 43epoch:train:2101-2200batch: iter_time=1.188e-04, forward_time=0.146, loss_ctc=64.820, loss_att=50.416, acc=0.711, loss=54.738, backward_time=1.030, grad_norm=111.591, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 06:00:26,904 (trainer:732) INFO: 43epoch:train:2201-2300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.248, loss_att=47.166, acc=0.714, loss=53.791, backward_time=1.026, grad_norm=117.367, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.507e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 06:02:46,061 (trainer:732) INFO: 43epoch:train:2301-2400batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=65.678, loss_att=43.020, acc=0.730, loss=49.818, backward_time=1.039, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.506e-05, train_time=2.783
+[gpub002:0/64] 2023-07-13 06:05:02,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 06:05:21,017 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3da869b0>)
+[gpub002:0/64] 2023-07-13 06:05:24,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 06:05:24,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 06:11:05,880 (trainer:732) INFO: 43epoch:train:2401-2500batch: iter_time=1.277, forward_time=0.147, loss_ctc=70.028, loss_att=50.589, acc=0.734, loss=56.420, backward_time=1.041, grad_norm=117.428, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=9.996
+[gpub002:0/64] 2023-07-13 06:13:45,256 (trainer:732) INFO: 43epoch:train:2501-2600batch: iter_time=1.182e-04, forward_time=0.146, loss_ctc=65.700, loss_att=49.932, acc=0.707, loss=54.662, backward_time=1.045, grad_norm=108.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.505e-05, train_time=3.187
+[gpub002:0/64] 2023-07-13 06:16:01,175 (trainer:732) INFO: 43epoch:train:2601-2700batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=69.006, loss_att=51.406, acc=0.708, loss=56.686, backward_time=1.029, grad_norm=117.685, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.504e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 06:18:16,928 (trainer:732) INFO: 43epoch:train:2701-2800batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=68.756, loss_att=49.034, acc=0.720, loss=54.951, backward_time=1.026, grad_norm=113.801, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 06:20:32,341 (trainer:732) INFO: 43epoch:train:2801-2900batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=74.163, loss_att=54.309, acc=0.702, loss=60.265, backward_time=1.024, grad_norm=120.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.503e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 06:22:47,910 (trainer:732) INFO: 43epoch:train:2901-3000batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=68.902, loss_att=52.706, acc=0.711, loss=57.565, backward_time=1.027, grad_norm=117.962, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.502e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 06:25:04,075 (trainer:732) INFO: 43epoch:train:3001-3100batch: iter_time=1.264e-04, forward_time=0.148, loss_ctc=72.330, loss_att=53.634, acc=0.706, loss=59.243, backward_time=1.029, grad_norm=126.462, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 06:27:19,562 (trainer:732) INFO: 43epoch:train:3101-3200batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=64.870, loss_att=42.763, acc=0.727, loss=49.396, backward_time=1.026, grad_norm=109.345, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.501e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 06:29:35,085 (trainer:732) INFO: 43epoch:train:3201-3300batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=66.320, loss_att=48.097, acc=0.724, loss=53.564, backward_time=1.026, grad_norm=116.268, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.500e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 06:30:19,781 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 06:30:37,996 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac125cbfa0>)
+[gpub002:0/64] 2023-07-13 06:30:41,419 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 06:30:41,425 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 06:36:35,849 (trainer:732) INFO: 43epoch:train:3301-3400batch: iter_time=1.288, forward_time=0.145, loss_ctc=70.209, loss_att=56.960, acc=0.702, loss=60.934, backward_time=1.040, grad_norm=127.952, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=8.415
+[gpub002:0/64] 2023-07-13 06:38:52,517 (trainer:732) INFO: 43epoch:train:3401-3500batch: iter_time=1.077e-04, forward_time=0.147, loss_ctc=70.143, loss_att=50.603, acc=0.720, loss=56.465, backward_time=1.032, grad_norm=141.257, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.499e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 06:41:08,598 (trainer:732) INFO: 43epoch:train:3501-3600batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=65.845, loss_att=47.427, acc=0.720, loss=52.953, backward_time=1.031, grad_norm=125.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.498e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 06:43:24,520 (trainer:732) INFO: 43epoch:train:3601-3700batch: iter_time=1.134e-04, forward_time=0.146, loss_ctc=77.247, loss_att=55.054, acc=0.710, loss=61.712, backward_time=1.028, grad_norm=118.561, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 06:45:40,581 (trainer:732) INFO: 43epoch:train:3701-3800batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=65.852, loss_att=50.169, acc=0.722, loss=54.874, backward_time=1.031, grad_norm=106.865, clip=100.000, loss_scale=4.803e+32, optim_step_time=0.182, optim0_lr0=5.497e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 06:47:59,361 (trainer:732) INFO: 43epoch:train:3801-3900batch: iter_time=1.074e-04, forward_time=0.148, loss_ctc=72.644, loss_att=54.032, acc=0.710, loss=59.615, backward_time=1.031, grad_norm=136.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.496e-05, train_time=2.775
+[gpub002:0/64] 2023-07-13 06:50:15,237 (trainer:732) INFO: 43epoch:train:3901-4000batch: iter_time=1.063e-04, forward_time=0.147, loss_ctc=64.068, loss_att=40.961, acc=0.734, loss=47.893, backward_time=1.029, grad_norm=130.607, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 06:52:31,943 (trainer:732) INFO: 43epoch:train:4001-4100batch: iter_time=1.034e-04, forward_time=0.147, loss_ctc=65.865, loss_att=48.470, acc=0.723, loss=53.688, backward_time=1.030, grad_norm=141.896, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.495e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 06:54:15,645 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 06:54:33,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbe07b3a0>)
+[gpub002:0/64] 2023-07-13 06:54:37,089 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 06:54:37,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 06:58:54,596 (trainer:732) INFO: 43epoch:train:4101-4200batch: iter_time=1.344, forward_time=0.226, loss_ctc=68.341, loss_att=49.394, acc=0.720, loss=55.078, backward_time=1.069, grad_norm=135.878, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.494e-05, train_time=7.653
+[gpub002:0/64] 2023-07-13 07:01:12,004 (trainer:732) INFO: 43epoch:train:4201-4300batch: iter_time=1.012e-04, forward_time=0.148, loss_ctc=68.518, loss_att=52.143, acc=0.714, loss=57.055, backward_time=1.031, grad_norm=164.714, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.748
+[gpub002:0/64] 2023-07-13 07:03:28,068 (trainer:732) INFO: 43epoch:train:4301-4400batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=78.283, loss_att=56.097, acc=0.717, loss=62.753, backward_time=1.031, grad_norm=131.835, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.493e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 07:05:44,046 (trainer:732) INFO: 43epoch:train:4401-4500batch: iter_time=1.003e-04, forward_time=0.146, loss_ctc=61.875, loss_att=43.498, acc=0.722, loss=49.011, backward_time=1.030, grad_norm=101.796, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.492e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 07:08:00,105 (trainer:732) INFO: 43epoch:train:4501-4600batch: iter_time=9.596e-05, forward_time=0.145, loss_ctc=80.642, loss_att=61.038, acc=0.717, loss=66.919, backward_time=1.029, grad_norm=116.023, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 07:10:39,483 (trainer:732) INFO: 43epoch:train:4601-4700batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=62.748, loss_att=49.774, acc=0.715, loss=53.666, backward_time=1.071, grad_norm=110.121, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.491e-05, train_time=3.187
+[gpub002:0/64] 2023-07-13 07:12:58,455 (trainer:732) INFO: 43epoch:train:4701-4800batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=69.387, loss_att=46.901, acc=0.713, loss=53.646, backward_time=1.043, grad_norm=122.959, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.490e-05, train_time=2.779
+[gpub002:0/64] 2023-07-13 07:15:14,699 (trainer:732) INFO: 43epoch:train:4801-4900batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=63.195, loss_att=42.368, acc=0.732, loss=48.616, backward_time=1.029, grad_norm=110.403, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 07:16:46,779 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 07:17:31,636 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 07:17:49,916 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc5ecca90>)
+[gpub002:0/64] 2023-07-13 07:17:53,361 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 07:17:53,368 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 07:21:12,580 (trainer:732) INFO: 43epoch:train:4901-5000batch: iter_time=1.363, forward_time=0.145, loss_ctc=69.506, loss_att=50.192, acc=0.737, loss=55.987, backward_time=1.033, grad_norm=111.488, clip=100.000, loss_scale=5.431e+32, optim_step_time=0.182, optim0_lr0=5.489e-05, train_time=7.157
+[gpub002:0/64] 2023-07-13 07:23:30,148 (trainer:732) INFO: 43epoch:train:5001-5100batch: iter_time=1.400e-04, forward_time=0.148, loss_ctc=65.881, loss_att=50.693, acc=0.708, loss=55.250, backward_time=1.037, grad_norm=122.810, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.488e-05, train_time=2.751
+[gpub002:0/64] 2023-07-13 07:25:46,716 (trainer:732) INFO: 43epoch:train:5101-5200batch: iter_time=1.426e-04, forward_time=0.147, loss_ctc=67.325, loss_att=50.408, acc=0.713, loss=55.483, backward_time=1.029, grad_norm=122.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 07:28:02,507 (trainer:732) INFO: 43epoch:train:5201-5300batch: iter_time=1.599e-04, forward_time=0.146, loss_ctc=69.909, loss_att=49.226, acc=0.722, loss=55.431, backward_time=1.029, grad_norm=113.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.487e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 07:28:23,967 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 07:30:18,027 (trainer:732) INFO: 43epoch:train:5301-5400batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=73.830, loss_att=53.727, acc=0.707, loss=59.758, backward_time=1.028, grad_norm=126.012, clip=100.000, loss_scale=1.854e+32, optim_step_time=0.182, optim0_lr0=5.486e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 07:32:33,556 (trainer:732) INFO: 43epoch:train:5401-5500batch: iter_time=1.358e-04, forward_time=0.145, loss_ctc=69.457, loss_att=53.006, acc=0.711, loss=57.941, backward_time=1.027, grad_norm=118.602, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 07:34:49,617 (trainer:732) INFO: 43epoch:train:5501-5600batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=70.674, loss_att=52.567, acc=0.708, loss=57.999, backward_time=1.029, grad_norm=124.421, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.485e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 07:37:05,125 (trainer:732) INFO: 43epoch:train:5601-5700batch: iter_time=1.538e-04, forward_time=0.146, loss_ctc=64.632, loss_att=42.627, acc=0.728, loss=49.228, backward_time=1.026, grad_norm=117.128, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.484e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 07:39:20,847 (trainer:732) INFO: 43epoch:train:5701-5800batch: iter_time=1.408e-04, forward_time=0.147, loss_ctc=65.046, loss_att=47.284, acc=0.726, loss=52.613, backward_time=1.028, grad_norm=118.675, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 07:40:20,191 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 07:40:38,345 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabfa9ce350>)
+[gpub002:0/64] 2023-07-13 07:40:41,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 07:40:41,843 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 07:46:21,327 (trainer:732) INFO: 43epoch:train:5801-5900batch: iter_time=2.635, forward_time=0.147, loss_ctc=69.559, loss_att=55.942, acc=0.705, loss=60.027, backward_time=1.049, grad_norm=111.034, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.483e-05, train_time=8.409
+[gpub002:0/64] 2023-07-13 07:48:37,870 (trainer:732) INFO: 43epoch:train:5901-6000batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=71.663, loss_att=52.278, acc=0.708, loss=58.094, backward_time=1.029, grad_norm=167.485, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.482e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 07:50:53,784 (trainer:732) INFO: 43epoch:train:6001-6100batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=65.614, loss_att=46.575, acc=0.717, loss=52.287, backward_time=1.029, grad_norm=119.203, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 07:53:27,460 (trainer:732) INFO: 43epoch:train:6101-6200batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=75.060, loss_att=54.210, acc=0.707, loss=60.465, backward_time=1.046, grad_norm=113.546, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.481e-05, train_time=3.073
+[gpub002:0/64] 2023-07-13 07:55:51,083 (trainer:732) INFO: 43epoch:train:6201-6300batch: iter_time=3.027e-04, forward_time=0.179, loss_ctc=66.883, loss_att=50.860, acc=0.711, loss=55.667, backward_time=1.034, grad_norm=113.637, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.480e-05, train_time=2.872
+[gpub002:0/64] 2023-07-13 07:58:09,272 (trainer:732) INFO: 43epoch:train:6301-6400batch: iter_time=1.044e-04, forward_time=0.163, loss_ctc=73.490, loss_att=54.979, acc=0.704, loss=60.532, backward_time=1.030, grad_norm=134.206, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.479e-05, train_time=2.764
+[gpub002:0/64] 2023-07-13 08:00:24,736 (trainer:732) INFO: 43epoch:train:6401-6500batch: iter_time=1.026e-04, forward_time=0.144, loss_ctc=65.033, loss_att=41.165, acc=0.730, loss=48.326, backward_time=1.025, grad_norm=125.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.479e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 08:02:40,579 (trainer:732) INFO: 43epoch:train:6501-6600batch: iter_time=1.147e-04, forward_time=0.144, loss_ctc=67.103, loss_att=47.658, acc=0.719, loss=53.491, backward_time=1.028, grad_norm=121.695, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.478e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 08:04:11,900 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 08:04:30,016 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabfaa0f4f0>)
+[gpub002:0/64] 2023-07-13 08:04:33,389 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 08:04:33,395 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 08:09:38,546 (trainer:732) INFO: 43epoch:train:6601-6700batch: iter_time=1.336, forward_time=0.186, loss_ctc=69.082, loss_att=51.109, acc=0.714, loss=56.501, backward_time=1.038, grad_norm=108.294, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.477e-05, train_time=8.359
+[gpub002:0/64] 2023-07-13 08:10:34,386 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 08:12:01,808 (trainer:732) INFO: 43epoch:train:6701-6800batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=67.704, loss_att=51.773, acc=0.711, loss=56.552, backward_time=1.037, grad_norm=130.635, clip=100.000, loss_scale=1.109e+32, optim_step_time=0.182, optim0_lr0=5.477e-05, train_time=2.865
+[gpub002:0/64] 2023-07-13 08:14:23,253 (trainer:732) INFO: 43epoch:train:6801-6900batch: iter_time=1.293e-04, forward_time=0.145, loss_ctc=77.182, loss_att=56.548, acc=0.705, loss=62.738, backward_time=1.042, grad_norm=118.840, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.829
+[gpub002:0/64] 2023-07-13 08:16:48,767 (trainer:732) INFO: 43epoch:train:6901-7000batch: iter_time=1.451e-04, forward_time=0.145, loss_ctc=60.933, loss_att=43.482, acc=0.718, loss=48.717, backward_time=1.039, grad_norm=142.200, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.476e-05, train_time=2.910
+[gpub002:0/64] 2023-07-13 08:19:18,077 (trainer:732) INFO: 43epoch:train:7001-7100batch: iter_time=1.466e-04, forward_time=0.146, loss_ctc=80.225, loss_att=60.938, acc=0.714, loss=66.724, backward_time=1.043, grad_norm=128.693, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.475e-05, train_time=2.986
+[gpub002:0/64] 2023-07-13 08:21:41,636 (trainer:732) INFO: 43epoch:train:7101-7200batch: iter_time=1.517e-04, forward_time=0.146, loss_ctc=62.221, loss_att=49.382, acc=0.708, loss=53.234, backward_time=1.042, grad_norm=137.390, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.474e-05, train_time=2.871
+[gpub002:0/64] 2023-07-13 08:24:09,625 (trainer:732) INFO: 43epoch:train:7201-7300batch: iter_time=1.485e-04, forward_time=0.145, loss_ctc=69.377, loss_att=47.292, acc=0.709, loss=53.917, backward_time=1.058, grad_norm=123.920, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.474e-05, train_time=2.960
+[gpub002:0/64] 2023-07-13 08:26:25,316 (trainer:732) INFO: 43epoch:train:7301-7400batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=62.734, loss_att=42.130, acc=0.731, loss=48.311, backward_time=1.028, grad_norm=112.463, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.473e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 08:28:42,136 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 08:29:00,185 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac1754fca0>)
+[gpub002:0/64] 2023-07-13 08:29:03,836 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 08:29:03,842 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 08:31:55,060 (trainer:732) INFO: 43epoch:train:7401-7500batch: iter_time=1.410, forward_time=0.173, loss_ctc=70.908, loss_att=52.086, acc=0.730, loss=57.732, backward_time=1.031, grad_norm=103.567, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=6.595
+[gpub002:0/64] 2023-07-13 08:34:14,214 (trainer:732) INFO: 43epoch:train:7501-7600batch: iter_time=1.180e-04, forward_time=0.147, loss_ctc=66.889, loss_att=51.014, acc=0.718, loss=55.777, backward_time=1.039, grad_norm=122.067, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.472e-05, train_time=2.783
+[gpub002:0/64] 2023-07-13 08:36:31,213 (trainer:732) INFO: 43epoch:train:7601-7700batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=76.393, loss_att=55.664, acc=0.714, loss=61.882, backward_time=1.029, grad_norm=138.860, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.471e-05, train_time=2.740
+[gpub002:0/64] 2023-07-13 08:38:47,398 (trainer:732) INFO: 43epoch:train:7701-7800batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=62.356, loss_att=44.020, acc=0.725, loss=49.521, backward_time=1.030, grad_norm=118.319, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.470e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 08:41:05,554 (trainer:732) INFO: 43epoch:train:7801-7900batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=79.362, loss_att=60.799, acc=0.719, loss=66.368, backward_time=1.031, grad_norm=129.499, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.470e-05, train_time=2.763
+[gpub002:0/64] 2023-07-13 08:43:32,372 (trainer:732) INFO: 43epoch:train:7901-8000batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=62.046, loss_att=49.851, acc=0.708, loss=53.509, backward_time=1.046, grad_norm=124.743, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.469e-05, train_time=2.936
+[gpub002:0/64] 2023-07-13 08:45:48,570 (trainer:732) INFO: 43epoch:train:8001-8100batch: iter_time=1.225e-04, forward_time=0.144, loss_ctc=67.225, loss_att=44.208, acc=0.725, loss=51.113, backward_time=1.027, grad_norm=118.012, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 08:48:07,719 (trainer:732) INFO: 43epoch:train:8101-8200batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=64.666, loss_att=43.302, acc=0.727, loss=49.711, backward_time=1.038, grad_norm=118.883, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.468e-05, train_time=2.783
+[gpub002:0/64] 2023-07-13 08:50:32,893 (trainer:732) INFO: 43epoch:train:8201-8300batch: iter_time=1.252e-04, forward_time=0.145, loss_ctc=70.428, loss_att=52.218, acc=0.734, loss=57.681, backward_time=1.049, grad_norm=109.704, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.467e-05, train_time=2.903
+[gpub002:0/64] 2023-07-13 08:51:36,412 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 08:51:54,619 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad36bb32e0>)
+[gpub002:0/64] 2023-07-13 08:51:58,008 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 08:51:58,097 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 08:58:16,119 (trainer:732) INFO: 43epoch:train:8301-8400batch: iter_time=3.044, forward_time=0.205, loss_ctc=68.169, loss_att=54.539, acc=0.704, loss=58.628, backward_time=1.054, grad_norm=109.947, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.186, optim0_lr0=5.466e-05, train_time=9.264
+[gpub002:0/64] 2023-07-13 09:00:33,779 (trainer:732) INFO: 43epoch:train:8401-8500batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=71.006, loss_att=51.275, acc=0.715, loss=57.194, backward_time=1.029, grad_norm=111.833, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.466e-05, train_time=2.753
+[gpub002:0/64] 2023-07-13 09:02:50,313 (trainer:732) INFO: 43epoch:train:8501-8600batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=65.216, loss_att=46.304, acc=0.720, loss=51.978, backward_time=1.031, grad_norm=114.151, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.465e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 09:05:11,317 (trainer:732) INFO: 43epoch:train:8601-8700batch: iter_time=1.264e-04, forward_time=0.144, loss_ctc=75.105, loss_att=54.233, acc=0.710, loss=60.494, backward_time=1.034, grad_norm=141.900, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.820
+[gpub002:0/64] 2023-07-13 09:07:26,944 (trainer:732) INFO: 43epoch:train:8701-8800batch: iter_time=1.226e-04, forward_time=0.144, loss_ctc=65.886, loss_att=50.674, acc=0.710, loss=55.238, backward_time=1.027, grad_norm=114.949, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.464e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 09:09:54,294 (trainer:732) INFO: 43epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=72.328, loss_att=54.323, acc=0.708, loss=59.725, backward_time=1.040, grad_norm=129.091, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.463e-05, train_time=2.947
+[gpub002:0/64] 2023-07-13 09:12:09,955 (trainer:732) INFO: 43epoch:train:8901-9000batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=62.790, loss_att=40.413, acc=0.733, loss=47.126, backward_time=1.026, grad_norm=123.009, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 09:14:30,267 (trainer:732) INFO: 43epoch:train:9001-9100batch: iter_time=1.184e-04, forward_time=0.144, loss_ctc=65.546, loss_att=47.434, acc=0.723, loss=52.868, backward_time=1.030, grad_norm=126.202, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.462e-05, train_time=2.806
+[gpub002:0/64] 2023-07-13 09:16:03,133 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 09:16:21,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc151b430>)
+[gpub002:0/64] 2023-07-13 09:16:24,730 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 09:16:24,737 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 09:19:55,353 (trainer:732) INFO: 43epoch:train:9101-9200batch: iter_time=1.299, forward_time=0.166, loss_ctc=68.022, loss_att=50.137, acc=0.719, loss=55.502, backward_time=1.039, grad_norm=114.933, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.461e-05, train_time=6.502
+[gpub002:0/64] 2023-07-13 09:22:18,425 (trainer:732) INFO: 43epoch:train:9201-9300batch: iter_time=0.002, forward_time=0.182, loss_ctc=67.757, loss_att=51.479, acc=0.719, loss=56.362, backward_time=1.044, grad_norm=114.798, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.184, optim0_lr0=5.460e-05, train_time=2.861
+[gpub002:0/64] 2023-07-13 09:24:35,968 (trainer:732) INFO: 43epoch:train:9301-9400batch: iter_time=1.123e-04, forward_time=0.148, loss_ctc=75.353, loss_att=56.327, acc=0.716, loss=62.035, backward_time=1.030, grad_norm=143.157, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.460e-05, train_time=2.751
+[gpub002:0/64] 2023-07-13 09:26:52,066 (trainer:732) INFO: 43epoch:train:9401-9500batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=63.167, loss_att=43.706, acc=0.722, loss=49.544, backward_time=1.028, grad_norm=124.127, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 09:29:08,787 (trainer:732) INFO: 43epoch:train:9501-9600batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=78.082, loss_att=59.968, acc=0.719, loss=65.402, backward_time=1.029, grad_norm=139.813, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.459e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 09:31:24,647 (trainer:732) INFO: 43epoch:train:9601-9700batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=62.075, loss_att=49.317, acc=0.717, loss=53.145, backward_time=1.028, grad_norm=129.904, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.458e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 09:33:40,464 (trainer:732) INFO: 43epoch:train:9701-9800batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=67.821, loss_att=46.388, acc=0.718, loss=52.818, backward_time=1.029, grad_norm=116.293, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 09:35:56,258 (trainer:732) INFO: 43epoch:train:9801-9900batch: iter_time=1.285e-04, forward_time=0.146, loss_ctc=62.427, loss_att=40.698, acc=0.739, loss=47.217, backward_time=1.028, grad_norm=116.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.457e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 09:38:11,996 (trainer:732) INFO: 43epoch:train:9901-10000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=69.128, loss_att=50.402, acc=0.737, loss=56.020, backward_time=1.028, grad_norm=113.548, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.456e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 09:51:01,601 (trainer:338) INFO: 43epoch results: [train] iter_time=0.220, forward_time=0.151, loss_ctc=69.174, loss_att=50.360, acc=0.716, loss=56.005, backward_time=1.034, grad_norm=122.264, clip=100.000, loss_scale=2.614e+32, optim_step_time=0.182, optim0_lr0=5.488e-05, train_time=3.443, time=4 hours, 47 minutes and 9.28 seconds, total_count=400000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=44.035, cer_ctc=0.257, loss_att=36.329, acc=0.704, cer=0.323, wer=0.986, loss=38.641, time=6 minutes and 40.54 seconds, total_count=40986, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 57.64 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-13 09:51:20,539 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub002:0/64] 2023-07-13 09:51:20,601 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/23epoch.pth
+[gpub002:0/64] 2023-07-13 09:51:20,630 (trainer:272) INFO: 44/50epoch started. Estimated time to finish: 1 day, 11 hours and 55 minutes
+[gpub002:0/64] 2023-07-13 09:51:21,201 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 09:51:40,196 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6c2aa3e20>)
+[gpub002:0/64] 2023-07-13 09:51:43,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 09:51:43,691 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 09:59:49,160 (trainer:732) INFO: 44epoch:train:1-100batch: iter_time=3.652, forward_time=0.172, loss_ctc=71.282, loss_att=54.434, acc=0.717, loss=59.488, backward_time=1.044, grad_norm=125.239, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.455e-05, train_time=10.160
+[gpub002:0/64] 2023-07-13 10:02:06,233 (trainer:732) INFO: 44epoch:train:101-200batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=74.775, loss_att=56.050, acc=0.700, loss=61.668, backward_time=1.031, grad_norm=122.349, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.455e-05, train_time=2.742
+[gpub002:0/64] 2023-07-13 10:04:22,775 (trainer:732) INFO: 44epoch:train:201-300batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=87.979, loss_att=66.421, acc=0.708, loss=72.889, backward_time=1.032, grad_norm=117.444, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.454e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 10:06:38,707 (trainer:732) INFO: 44epoch:train:301-400batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=70.492, loss_att=49.371, acc=0.696, loss=55.707, backward_time=1.027, grad_norm=120.741, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 10:08:58,696 (trainer:732) INFO: 44epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=76.164, loss_att=60.005, acc=0.695, loss=64.852, backward_time=1.030, grad_norm=120.304, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.453e-05, train_time=2.800
+[gpub002:0/64] 2023-07-13 10:11:18,222 (trainer:732) INFO: 44epoch:train:501-600batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=86.092, loss_att=56.917, acc=0.713, loss=65.670, backward_time=1.031, grad_norm=137.885, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.452e-05, train_time=2.790
+[gpub002:0/64] 2023-07-13 10:13:52,188 (trainer:732) INFO: 44epoch:train:601-700batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=72.733, loss_att=54.095, acc=0.720, loss=59.686, backward_time=1.042, grad_norm=123.779, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.079
+[gpub002:0/64] 2023-07-13 10:16:22,649 (trainer:732) INFO: 44epoch:train:701-800batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=76.359, loss_att=63.914, acc=0.695, loss=67.647, backward_time=1.055, grad_norm=120.020, clip=100.000, loss_scale=1.314e+32, optim_step_time=0.182, optim0_lr0=5.451e-05, train_time=3.009
+[gpub002:0/64] 2023-07-13 10:17:24,426 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 10:17:42,352 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6c2aa0460>)
+[gpub002:0/64] 2023-07-13 10:17:45,723 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 10:17:45,730 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 10:24:20,092 (trainer:732) INFO: 44epoch:train:801-900batch: iter_time=3.299, forward_time=0.177, loss_ctc=75.207, loss_att=54.676, acc=0.718, loss=60.835, backward_time=1.047, grad_norm=125.033, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.450e-05, train_time=9.548
+[gpub002:0/64] 2023-07-13 10:26:36,519 (trainer:732) INFO: 44epoch:train:901-1000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=74.822, loss_att=57.463, acc=0.693, loss=62.670, backward_time=1.028, grad_norm=130.012, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 10:28:53,173 (trainer:732) INFO: 44epoch:train:1001-1100batch: iter_time=1.127e-04, forward_time=0.147, loss_ctc=83.374, loss_att=63.558, acc=0.698, loss=69.502, backward_time=1.030, grad_norm=144.799, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.449e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 10:31:09,555 (trainer:732) INFO: 44epoch:train:1101-1200batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=77.286, loss_att=52.646, acc=0.715, loss=60.038, backward_time=1.031, grad_norm=129.265, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.448e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 10:33:25,148 (trainer:732) INFO: 44epoch:train:1201-1300batch: iter_time=1.248e-04, forward_time=0.143, loss_ctc=71.158, loss_att=53.092, acc=0.697, loss=58.512, backward_time=1.026, grad_norm=97.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 10:35:40,885 (trainer:732) INFO: 44epoch:train:1301-1400batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=79.055, loss_att=55.984, acc=0.699, loss=62.905, backward_time=1.028, grad_norm=113.589, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.447e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 10:37:56,583 (trainer:732) INFO: 44epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=76.489, loss_att=55.112, acc=0.717, loss=61.525, backward_time=1.027, grad_norm=124.920, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.446e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 10:40:12,676 (trainer:732) INFO: 44epoch:train:1501-1600batch: iter_time=1.402e-04, forward_time=0.145, loss_ctc=71.799, loss_att=59.313, acc=0.689, loss=63.059, backward_time=1.031, grad_norm=120.829, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.446e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 10:41:44,379 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 10:42:02,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad4104bca0>)
+[gpub002:0/64] 2023-07-13 10:42:06,277 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 10:42:06,283 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 10:45:52,981 (trainer:732) INFO: 44epoch:train:1601-1700batch: iter_time=1.287, forward_time=0.144, loss_ctc=80.146, loss_att=60.139, acc=0.710, loss=66.141, backward_time=1.038, grad_norm=125.126, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.445e-05, train_time=6.806
+[gpub002:0/64] 2023-07-13 10:48:09,910 (trainer:732) INFO: 44epoch:train:1701-1800batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=70.303, loss_att=54.003, acc=0.711, loss=58.893, backward_time=1.033, grad_norm=114.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 10:50:25,669 (trainer:732) INFO: 44epoch:train:1801-1900batch: iter_time=1.468e-04, forward_time=0.146, loss_ctc=76.075, loss_att=61.114, acc=0.693, loss=65.602, backward_time=1.028, grad_norm=138.441, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.444e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 10:52:41,672 (trainer:732) INFO: 44epoch:train:1901-2000batch: iter_time=1.075e-04, forward_time=0.144, loss_ctc=83.849, loss_att=56.957, acc=0.718, loss=65.025, backward_time=1.029, grad_norm=117.446, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.443e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 10:54:57,290 (trainer:732) INFO: 44epoch:train:2001-2100batch: iter_time=1.074e-04, forward_time=0.145, loss_ctc=67.877, loss_att=48.882, acc=0.702, loss=54.581, backward_time=1.026, grad_norm=113.885, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 10:57:13,426 (trainer:732) INFO: 44epoch:train:2101-2200batch: iter_time=9.840e-05, forward_time=0.144, loss_ctc=81.071, loss_att=59.103, acc=0.698, loss=65.693, backward_time=1.029, grad_norm=131.853, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.442e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 10:59:32,038 (trainer:732) INFO: 44epoch:train:2201-2300batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=74.573, loss_att=53.223, acc=0.709, loss=59.628, backward_time=1.030, grad_norm=119.358, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.441e-05, train_time=2.772
+[gpub002:0/64] 2023-07-13 11:01:50,330 (trainer:732) INFO: 44epoch:train:2301-2400batch: iter_time=1.018e-04, forward_time=0.145, loss_ctc=75.625, loss_att=60.504, acc=0.695, loss=65.040, backward_time=1.041, grad_norm=128.773, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=2.766
+[gpub002:0/64] 2023-07-13 11:04:07,084 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 11:04:25,422 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac69e93640>)
+[gpub002:0/64] 2023-07-13 11:04:28,900 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 11:04:28,906 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 11:08:22,215 (trainer:732) INFO: 44epoch:train:2401-2500batch: iter_time=1.295, forward_time=0.144, loss_ctc=78.132, loss_att=62.868, acc=0.704, loss=67.447, backward_time=1.036, grad_norm=137.248, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.440e-05, train_time=7.837
+[gpub002:0/64] 2023-07-13 11:10:40,796 (trainer:732) INFO: 44epoch:train:2501-2600batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=67.608, loss_att=49.652, acc=0.723, loss=55.039, backward_time=1.036, grad_norm=147.316, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.439e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 11:12:56,584 (trainer:732) INFO: 44epoch:train:2601-2700batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=73.380, loss_att=58.680, acc=0.696, loss=63.090, backward_time=1.029, grad_norm=223.909, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 11:15:12,427 (trainer:732) INFO: 44epoch:train:2701-2800batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=86.240, loss_att=61.095, acc=0.709, loss=68.639, backward_time=1.027, grad_norm=160.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.438e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 11:17:27,987 (trainer:732) INFO: 44epoch:train:2801-2900batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=68.542, loss_att=47.662, acc=0.704, loss=53.926, backward_time=1.026, grad_norm=161.052, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 11:19:43,703 (trainer:732) INFO: 44epoch:train:2901-3000batch: iter_time=1.479e-04, forward_time=0.145, loss_ctc=74.838, loss_att=58.199, acc=0.693, loss=63.191, backward_time=1.028, grad_norm=127.448, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.437e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 11:21:59,433 (trainer:732) INFO: 44epoch:train:3001-3100batch: iter_time=1.218e-04, forward_time=0.144, loss_ctc=85.103, loss_att=56.031, acc=0.712, loss=64.753, backward_time=1.027, grad_norm=130.375, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.436e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 11:24:15,402 (trainer:732) INFO: 44epoch:train:3101-3200batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.848, loss_att=54.267, acc=0.711, loss=59.241, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.435e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 11:26:31,345 (trainer:732) INFO: 44epoch:train:3201-3300batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=78.308, loss_att=65.274, acc=0.688, loss=69.184, backward_time=1.030, grad_norm=163.070, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.435e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 11:27:16,348 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 11:27:34,316 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6c86609d0>)
+[gpub002:0/64] 2023-07-13 11:27:37,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 11:27:37,771 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 11:34:38,710 (trainer:732) INFO: 44epoch:train:3301-3400batch: iter_time=1.305, forward_time=0.144, loss_ctc=71.977, loss_att=52.228, acc=0.724, loss=58.153, backward_time=1.042, grad_norm=140.230, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.434e-05, train_time=9.747
+[gpub002:0/64] 2023-07-13 11:36:55,659 (trainer:732) INFO: 44epoch:train:3401-3500batch: iter_time=1.283e-04, forward_time=0.145, loss_ctc=73.853, loss_att=55.371, acc=0.710, loss=60.915, backward_time=1.029, grad_norm=146.633, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.433e-05, train_time=2.739
+[gpub002:0/64] 2023-07-13 11:39:16,602 (trainer:732) INFO: 44epoch:train:3501-3600batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=82.166, loss_att=61.239, acc=0.713, loss=67.517, backward_time=1.055, grad_norm=156.465, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.433e-05, train_time=2.819
+[gpub002:0/64] 2023-07-13 11:41:35,797 (trainer:732) INFO: 44epoch:train:3601-3700batch: iter_time=1.001e-04, forward_time=0.146, loss_ctc=76.586, loss_att=51.918, acc=0.716, loss=59.318, backward_time=1.044, grad_norm=124.601, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.193, optim0_lr0=5.432e-05, train_time=2.784
+[gpub002:0/64] 2023-07-13 11:43:51,423 (trainer:732) INFO: 44epoch:train:3701-3800batch: iter_time=9.900e-05, forward_time=0.145, loss_ctc=69.296, loss_att=53.491, acc=0.701, loss=58.232, backward_time=1.027, grad_norm=124.208, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 11:46:07,819 (trainer:732) INFO: 44epoch:train:3801-3900batch: iter_time=1.039e-04, forward_time=0.147, loss_ctc=78.553, loss_att=56.210, acc=0.707, loss=62.913, backward_time=1.031, grad_norm=124.973, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.431e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 11:48:28,053 (trainer:732) INFO: 44epoch:train:3901-4000batch: iter_time=1.387e-04, forward_time=0.148, loss_ctc=74.950, loss_att=55.969, acc=0.726, loss=61.664, backward_time=1.037, grad_norm=137.290, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.430e-05, train_time=2.804
+[gpub002:0/64] 2023-07-13 11:50:48,088 (trainer:732) INFO: 44epoch:train:4001-4100batch: iter_time=1.290e-04, forward_time=0.148, loss_ctc=70.087, loss_att=56.682, acc=0.705, loss=60.703, backward_time=1.035, grad_norm=149.974, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.429e-05, train_time=2.800
+[gpub002:0/64] 2023-07-13 11:52:19,107 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 11:52:37,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbe008a30>)
+[gpub002:0/64] 2023-07-13 11:52:40,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 11:52:40,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 11:59:04,406 (trainer:732) INFO: 44epoch:train:4101-4200batch: iter_time=1.307, forward_time=0.146, loss_ctc=78.980, loss_att=60.456, acc=0.713, loss=66.013, backward_time=1.053, grad_norm=139.801, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.429e-05, train_time=9.926
+[gpub002:0/64] 2023-07-13 12:01:20,686 (trainer:732) INFO: 44epoch:train:4201-4300batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=70.147, loss_att=53.310, acc=0.720, loss=58.362, backward_time=1.030, grad_norm=116.468, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 12:03:37,446 (trainer:732) INFO: 44epoch:train:4301-4400batch: iter_time=1.102e-04, forward_time=0.145, loss_ctc=76.582, loss_att=59.622, acc=0.708, loss=64.710, backward_time=1.032, grad_norm=126.272, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.428e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 12:05:53,280 (trainer:732) INFO: 44epoch:train:4401-4500batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=82.415, loss_att=56.209, acc=0.724, loss=64.071, backward_time=1.028, grad_norm=129.616, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.427e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 12:08:08,646 (trainer:732) INFO: 44epoch:train:4501-4600batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=65.252, loss_att=46.355, acc=0.705, loss=52.024, backward_time=1.024, grad_norm=116.158, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.707
+[gpub002:0/64] 2023-07-13 12:10:24,749 (trainer:732) INFO: 44epoch:train:4601-4700batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=80.559, loss_att=58.522, acc=0.706, loss=65.133, backward_time=1.030, grad_norm=160.477, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.426e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 12:12:40,507 (trainer:732) INFO: 44epoch:train:4701-4800batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=76.103, loss_att=53.892, acc=0.722, loss=60.555, backward_time=1.027, grad_norm=118.492, clip=100.000, loss_scale=2.629e+32, optim_step_time=0.181, optim0_lr0=5.425e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 12:14:56,545 (trainer:732) INFO: 44epoch:train:4801-4900batch: iter_time=1.162e-04, forward_time=0.146, loss_ctc=76.099, loss_att=61.644, acc=0.708, loss=65.980, backward_time=1.030, grad_norm=121.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 12:17:11,370 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 12:17:29,722 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc05b7640>)
+[gpub002:0/64] 2023-07-13 12:17:33,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 12:17:33,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 12:23:24,781 (trainer:732) INFO: 44epoch:train:4901-5000batch: iter_time=1.333, forward_time=0.145, loss_ctc=75.905, loss_att=57.847, acc=0.711, loss=63.264, backward_time=1.039, grad_norm=126.648, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.424e-05, train_time=10.165
+[gpub002:0/64] 2023-07-13 12:25:45,045 (trainer:732) INFO: 44epoch:train:5001-5100batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=70.805, loss_att=55.156, acc=0.702, loss=59.850, backward_time=1.041, grad_norm=116.577, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=2.805
+[gpub002:0/64] 2023-07-13 12:28:00,589 (trainer:732) INFO: 44epoch:train:5101-5200batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=78.881, loss_att=61.680, acc=0.692, loss=66.840, backward_time=1.027, grad_norm=123.318, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 12:30:16,542 (trainer:732) INFO: 44epoch:train:5201-5300batch: iter_time=1.268e-04, forward_time=0.147, loss_ctc=80.888, loss_att=54.612, acc=0.726, loss=62.495, backward_time=1.029, grad_norm=126.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.422e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 12:32:32,337 (trainer:732) INFO: 44epoch:train:5301-5400batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=65.520, loss_att=48.765, acc=0.704, loss=53.792, backward_time=1.028, grad_norm=115.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.421e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 12:34:59,810 (trainer:732) INFO: 44epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=77.603, loss_att=55.499, acc=0.696, loss=62.130, backward_time=1.038, grad_norm=131.812, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.421e-05, train_time=2.949
+[gpub002:0/64] 2023-07-13 12:37:15,536 (trainer:732) INFO: 44epoch:train:5501-5600batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.151, loss_att=52.695, acc=0.714, loss=58.832, backward_time=1.027, grad_norm=108.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.420e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 12:39:31,588 (trainer:732) INFO: 44epoch:train:5601-5700batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=77.041, loss_att=62.776, acc=0.692, loss=67.056, backward_time=1.030, grad_norm=117.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 12:41:47,589 (trainer:732) INFO: 44epoch:train:5701-5800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=75.964, loss_att=57.575, acc=0.711, loss=63.092, backward_time=1.029, grad_norm=131.182, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.419e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 12:42:32,777 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 12:42:51,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad420df7c0>)
+[gpub002:0/64] 2023-07-13 12:42:54,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 12:42:54,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 12:48:25,155 (trainer:732) INFO: 44epoch:train:5801-5900batch: iter_time=1.344, forward_time=0.188, loss_ctc=69.328, loss_att=52.361, acc=0.722, loss=57.451, backward_time=1.039, grad_norm=118.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.418e-05, train_time=7.951
+[gpub002:0/64] 2023-07-13 12:50:41,794 (trainer:732) INFO: 44epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.145, loss_ctc=71.719, loss_att=54.368, acc=0.713, loss=59.573, backward_time=1.028, grad_norm=148.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.733
+[gpub002:0/64] 2023-07-13 12:52:58,663 (trainer:732) INFO: 44epoch:train:6001-6100batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=82.110, loss_att=60.893, acc=0.716, loss=67.258, backward_time=1.032, grad_norm=162.632, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.417e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 12:55:14,739 (trainer:732) INFO: 44epoch:train:6101-6200batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=76.069, loss_att=50.970, acc=0.722, loss=58.499, backward_time=1.030, grad_norm=127.718, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.416e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 12:57:31,152 (trainer:732) INFO: 44epoch:train:6201-6300batch: iter_time=1.203e-04, forward_time=0.147, loss_ctc=68.497, loss_att=51.883, acc=0.704, loss=56.867, backward_time=1.031, grad_norm=116.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 12:59:47,343 (trainer:732) INFO: 44epoch:train:6301-6400batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=79.729, loss_att=55.182, acc=0.711, loss=62.546, backward_time=1.030, grad_norm=126.405, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.415e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 13:02:03,500 (trainer:732) INFO: 44epoch:train:6401-6500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.743, loss_att=55.259, acc=0.729, loss=61.104, backward_time=1.030, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 13:04:21,478 (trainer:732) INFO: 44epoch:train:6501-6600batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=72.287, loss_att=56.876, acc=0.707, loss=61.499, backward_time=1.035, grad_norm=121.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.414e-05, train_time=2.759
+[gpub002:0/64] 2023-07-13 13:06:07,952 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 13:06:26,685 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb2f334f4f0>)
+[gpub002:0/64] 2023-07-13 13:06:30,127 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 13:06:30,133 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 13:10:55,875 (trainer:732) INFO: 44epoch:train:6601-6700batch: iter_time=2.456, forward_time=0.155, loss_ctc=76.904, loss_att=57.906, acc=0.720, loss=63.606, backward_time=1.050, grad_norm=144.697, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.413e-05, train_time=7.888
+[gpub002:0/64] 2023-07-13 13:13:12,276 (trainer:732) INFO: 44epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.144, loss_ctc=71.936, loss_att=56.557, acc=0.703, loss=61.171, backward_time=1.027, grad_norm=116.798, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 13:15:28,808 (trainer:732) INFO: 44epoch:train:6801-6900batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=80.914, loss_att=61.465, acc=0.700, loss=67.299, backward_time=1.028, grad_norm=148.620, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.412e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 13:17:44,586 (trainer:732) INFO: 44epoch:train:6901-7000batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=77.724, loss_att=54.794, acc=0.706, loss=61.673, backward_time=1.027, grad_norm=112.450, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.411e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 13:20:00,070 (trainer:732) INFO: 44epoch:train:7001-7100batch: iter_time=1.287e-04, forward_time=0.144, loss_ctc=69.685, loss_att=51.830, acc=0.704, loss=57.186, backward_time=1.026, grad_norm=123.047, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.410e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 13:22:20,661 (trainer:732) INFO: 44epoch:train:7101-7200batch: iter_time=1.207e-04, forward_time=0.165, loss_ctc=81.406, loss_att=55.579, acc=0.704, loss=63.327, backward_time=1.031, grad_norm=171.513, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.410e-05, train_time=2.812
+[gpub002:0/64] 2023-07-13 13:24:36,675 (trainer:732) INFO: 44epoch:train:7201-7300batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=73.378, loss_att=57.476, acc=0.708, loss=62.246, backward_time=1.030, grad_norm=116.929, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.409e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 13:26:53,257 (trainer:732) INFO: 44epoch:train:7301-7400batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=72.420, loss_att=58.355, acc=0.694, loss=62.575, backward_time=1.029, grad_norm=124.267, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.408e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 13:29:10,199 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 13:29:28,349 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd42ecb4f0>)
+[gpub002:0/64] 2023-07-13 13:29:31,773 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 13:29:31,780 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 13:32:48,933 (trainer:732) INFO: 44epoch:train:7401-7500batch: iter_time=1.337, forward_time=0.199, loss_ctc=76.054, loss_att=55.067, acc=0.719, loss=61.363, backward_time=1.035, grad_norm=122.867, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.408e-05, train_time=7.113
+[gpub002:0/64] 2023-07-13 13:35:07,231 (trainer:732) INFO: 44epoch:train:7501-7600batch: iter_time=1.428e-04, forward_time=0.147, loss_ctc=72.133, loss_att=54.014, acc=0.717, loss=59.450, backward_time=1.036, grad_norm=138.467, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.407e-05, train_time=2.766
+[gpub002:0/64] 2023-07-13 13:37:23,964 (trainer:732) INFO: 44epoch:train:7601-7700batch: iter_time=9.887e-05, forward_time=0.145, loss_ctc=78.211, loss_att=59.906, acc=0.709, loss=65.397, backward_time=1.030, grad_norm=141.983, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.407e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 13:39:40,522 (trainer:732) INFO: 44epoch:train:7701-7800batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=81.364, loss_att=54.416, acc=0.730, loss=62.501, backward_time=1.028, grad_norm=135.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.406e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 13:41:55,986 (trainer:732) INFO: 44epoch:train:7801-7900batch: iter_time=1.134e-04, forward_time=0.144, loss_ctc=64.512, loss_att=47.472, acc=0.708, loss=52.584, backward_time=1.026, grad_norm=123.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 13:44:11,989 (trainer:732) INFO: 44epoch:train:7901-8000batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=77.259, loss_att=55.929, acc=0.709, loss=62.328, backward_time=1.029, grad_norm=131.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.405e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 13:46:37,906 (trainer:732) INFO: 44epoch:train:8001-8100batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=73.351, loss_att=53.304, acc=0.719, loss=59.318, backward_time=1.062, grad_norm=109.641, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.404e-05, train_time=2.918
+[gpub002:0/64] 2023-07-13 13:48:56,865 (trainer:732) INFO: 44epoch:train:8101-8200batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=74.136, loss_att=61.403, acc=0.701, loss=65.223, backward_time=1.035, grad_norm=118.234, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.779
+[gpub002:0/64] 2023-07-13 13:51:17,363 (trainer:732) INFO: 44epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=76.981, loss_att=57.043, acc=0.721, loss=63.025, backward_time=1.034, grad_norm=133.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.403e-05, train_time=2.810
+[gpub002:0/64] 2023-07-13 13:52:22,069 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 13:52:40,279 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc1125f90>)
+[gpub002:0/64] 2023-07-13 13:52:43,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 13:52:43,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 13:59:20,241 (trainer:732) INFO: 44epoch:train:8301-8400batch: iter_time=2.263, forward_time=0.185, loss_ctc=68.303, loss_att=51.973, acc=0.725, loss=56.872, backward_time=1.048, grad_norm=114.275, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.402e-05, train_time=9.657
+[gpub002:0/64] 2023-07-13 14:01:37,614 (trainer:732) INFO: 44epoch:train:8401-8500batch: iter_time=9.752e-05, forward_time=0.144, loss_ctc=71.653, loss_att=56.730, acc=0.711, loss=61.207, backward_time=1.030, grad_norm=117.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.402e-05, train_time=2.748
+[gpub002:0/64] 2023-07-13 14:03:54,243 (trainer:732) INFO: 44epoch:train:8501-8600batch: iter_time=1.303e-04, forward_time=0.147, loss_ctc=84.550, loss_att=58.625, acc=0.720, loss=66.402, backward_time=1.031, grad_norm=124.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.401e-05, train_time=2.732
+[gpub002:0/64] 2023-07-13 14:06:11,819 (trainer:732) INFO: 44epoch:train:8601-8700batch: iter_time=1.454e-04, forward_time=0.146, loss_ctc=68.495, loss_att=47.376, acc=0.706, loss=53.712, backward_time=1.032, grad_norm=109.505, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.751
+[gpub002:0/64] 2023-07-13 14:08:27,943 (trainer:732) INFO: 44epoch:train:8701-8800batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=74.817, loss_att=58.004, acc=0.704, loss=63.048, backward_time=1.031, grad_norm=116.827, clip=100.000, loss_scale=5.257e+32, optim_step_time=0.181, optim0_lr0=5.400e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 14:10:46,494 (trainer:732) INFO: 44epoch:train:8801-8900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=78.590, loss_att=52.950, acc=0.730, loss=60.642, backward_time=1.032, grad_norm=116.926, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.399e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 14:13:06,393 (trainer:732) INFO: 44epoch:train:8901-9000batch: iter_time=1.178e-04, forward_time=0.148, loss_ctc=72.075, loss_att=54.319, acc=0.717, loss=59.646, backward_time=1.037, grad_norm=123.080, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.798
+[gpub002:0/64] 2023-07-13 14:15:26,105 (trainer:732) INFO: 44epoch:train:9001-9100batch: iter_time=1.106e-04, forward_time=0.146, loss_ctc=79.231, loss_att=65.332, acc=0.701, loss=69.502, backward_time=1.047, grad_norm=112.101, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.398e-05, train_time=2.794
+[gpub002:0/64] 2023-07-13 14:17:01,910 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 14:17:20,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc41bb460>)
+[gpub002:0/64] 2023-07-13 14:17:24,093 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 14:17:24,100 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 14:23:35,296 (trainer:732) INFO: 44epoch:train:9101-9200batch: iter_time=1.994, forward_time=0.152, loss_ctc=64.897, loss_att=47.438, acc=0.726, loss=52.676, backward_time=1.049, grad_norm=112.693, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.397e-05, train_time=9.784
+[gpub002:0/64] 2023-07-13 14:25:52,081 (trainer:732) INFO: 44epoch:train:9201-9300batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=69.610, loss_att=54.648, acc=0.710, loss=59.136, backward_time=1.028, grad_norm=111.652, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.396e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 14:28:09,358 (trainer:732) INFO: 44epoch:train:9301-9400batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=73.827, loss_att=60.050, acc=0.696, loss=64.183, backward_time=1.027, grad_norm=114.849, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.396e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 14:30:25,352 (trainer:732) INFO: 44epoch:train:9401-9500batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=84.509, loss_att=55.869, acc=0.724, loss=64.461, backward_time=1.028, grad_norm=134.698, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.395e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 14:32:41,493 (trainer:732) INFO: 44epoch:train:9501-9600batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=65.351, loss_att=46.905, acc=0.708, loss=52.439, backward_time=1.027, grad_norm=114.409, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.395e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 14:34:57,531 (trainer:732) INFO: 44epoch:train:9601-9700batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=79.646, loss_att=58.448, acc=0.702, loss=64.808, backward_time=1.030, grad_norm=123.060, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.394e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 14:37:13,895 (trainer:732) INFO: 44epoch:train:9701-9800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=74.065, loss_att=51.749, acc=0.719, loss=58.444, backward_time=1.028, grad_norm=130.614, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 14:39:29,723 (trainer:732) INFO: 44epoch:train:9801-9900batch: iter_time=1.294e-04, forward_time=0.146, loss_ctc=75.390, loss_att=60.899, acc=0.695, loss=65.246, backward_time=1.028, grad_norm=124.549, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.393e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 14:41:45,627 (trainer:732) INFO: 44epoch:train:9901-10000batch: iter_time=1.350e-04, forward_time=0.147, loss_ctc=75.203, loss_att=57.881, acc=0.713, loss=63.078, backward_time=1.029, grad_norm=127.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.392e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 14:54:13,290 (trainer:338) INFO: 44epoch results: [train] iter_time=0.229, forward_time=0.148, loss_ctc=75.300, loss_att=56.099, acc=0.709, loss=61.860, backward_time=1.033, grad_norm=128.444, clip=100.000, loss_scale=2.826e+32, optim_step_time=0.182, optim0_lr0=5.423e-05, train_time=3.485, time=4 hours, 50 minutes and 36.87 seconds, total_count=410000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.588, cer_ctc=0.256, loss_att=36.048, acc=0.706, cer=0.321, wer=0.984, loss=38.310, time=6 minutes and 1.21 seconds, total_count=41998, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 14.4 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-13 14:54:29,899 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub002:0/64] 2023-07-13 14:54:29,920 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/37epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/39epoch.pth
+[gpub002:0/64] 2023-07-13 14:54:29,921 (trainer:272) INFO: 45/50epoch started. Estimated time to finish: 1 day, 6 hours and 41 minutes
+[gpub002:0/64] 2023-07-13 14:54:30,027 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 14:54:47,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6abe4f010>)
+[gpub002:0/64] 2023-07-13 14:54:52,321 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-13 14:54:52,372 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 15:01:36,666 (trainer:732) INFO: 45epoch:train:1-100batch: iter_time=2.837, forward_time=0.166, loss_ctc=81.665, loss_att=63.090, acc=0.699, loss=68.662, backward_time=1.044, grad_norm=124.602, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.186, optim0_lr0=5.391e-05, train_time=8.533
+[gpub002:0/64] 2023-07-13 15:03:34,215 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 15:03:58,918 (trainer:732) INFO: 45epoch:train:101-200batch: iter_time=1.454e-04, forward_time=0.167, loss_ctc=78.164, loss_att=54.166, acc=0.720, loss=61.365, backward_time=1.033, grad_norm=145.604, clip=100.000, loss_scale=5.894e+32, optim_step_time=0.183, optim0_lr0=5.391e-05, train_time=2.845
+[gpub002:0/64] 2023-07-13 15:06:25,567 (trainer:732) INFO: 45epoch:train:201-300batch: iter_time=1.205e-04, forward_time=0.151, loss_ctc=68.261, loss_att=52.387, acc=0.704, loss=57.149, backward_time=1.044, grad_norm=106.301, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.390e-05, train_time=2.933
+[gpub002:0/64] 2023-07-13 15:08:51,283 (trainer:732) INFO: 45epoch:train:301-400batch: iter_time=1.249e-04, forward_time=0.151, loss_ctc=66.533, loss_att=51.447, acc=0.717, loss=55.973, backward_time=1.039, grad_norm=108.528, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.390e-05, train_time=2.914
+[gpub002:0/64] 2023-07-13 15:11:15,915 (trainer:732) INFO: 45epoch:train:401-500batch: iter_time=1.270e-04, forward_time=0.162, loss_ctc=75.336, loss_att=58.055, acc=0.709, loss=63.240, backward_time=1.031, grad_norm=122.397, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.389e-05, train_time=2.892
+[gpub002:0/64] 2023-07-13 15:13:39,370 (trainer:732) INFO: 45epoch:train:501-600batch: iter_time=1.190e-04, forward_time=0.175, loss_ctc=80.003, loss_att=63.160, acc=0.708, loss=68.213, backward_time=1.047, grad_norm=123.534, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.388e-05, train_time=2.868
+[gpub002:0/64] 2023-07-13 15:16:05,037 (trainer:732) INFO: 45epoch:train:601-700batch: iter_time=1.220e-04, forward_time=0.160, loss_ctc=66.333, loss_att=48.550, acc=0.711, loss=53.885, backward_time=1.041, grad_norm=123.517, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.388e-05, train_time=2.914
+[gpub002:0/64] 2023-07-13 15:18:34,274 (trainer:732) INFO: 45epoch:train:701-800batch: iter_time=1.331e-04, forward_time=0.169, loss_ctc=70.528, loss_att=52.616, acc=0.702, loss=57.989, backward_time=1.059, grad_norm=106.529, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.387e-05, train_time=2.985
+[gpub002:0/64] 2023-07-13 15:19:38,148 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 15:19:56,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb2135f3100>)
+[gpub002:0/64] 2023-07-13 15:19:59,706 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 15:19:59,712 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 15:25:05,470 (trainer:732) INFO: 45epoch:train:801-900batch: iter_time=1.925, forward_time=0.196, loss_ctc=70.359, loss_att=50.574, acc=0.717, loss=56.510, backward_time=1.041, grad_norm=156.159, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.386e-05, train_time=7.823
+[gpub002:0/64] 2023-07-13 15:27:22,701 (trainer:732) INFO: 45epoch:train:901-1000batch: iter_time=1.363e-04, forward_time=0.147, loss_ctc=79.920, loss_att=62.286, acc=0.707, loss=67.576, backward_time=1.033, grad_norm=133.899, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.386e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 15:29:38,869 (trainer:732) INFO: 45epoch:train:1001-1100batch: iter_time=1.411e-04, forward_time=0.147, loss_ctc=73.445, loss_att=53.330, acc=0.704, loss=59.364, backward_time=1.030, grad_norm=132.822, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.385e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 15:31:56,763 (trainer:732) INFO: 45epoch:train:1101-1200batch: iter_time=4.801e-04, forward_time=0.146, loss_ctc=71.514, loss_att=56.740, acc=0.723, loss=61.172, backward_time=1.034, grad_norm=142.017, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.385e-05, train_time=2.758
+[gpub002:0/64] 2023-07-13 15:34:12,180 (trainer:732) INFO: 45epoch:train:1201-1300batch: iter_time=9.975e-05, forward_time=0.143, loss_ctc=64.208, loss_att=48.306, acc=0.716, loss=53.077, backward_time=1.025, grad_norm=147.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.384e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 15:36:49,716 (trainer:732) INFO: 45epoch:train:1301-1400batch: iter_time=4.440e-04, forward_time=0.316, loss_ctc=76.597, loss_att=58.015, acc=0.710, loss=63.590, backward_time=1.059, grad_norm=118.802, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.189, optim0_lr0=5.383e-05, train_time=3.150
+[gpub002:0/64] 2023-07-13 15:39:05,648 (trainer:732) INFO: 45epoch:train:1401-1500batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=75.275, loss_att=58.906, acc=0.714, loss=63.817, backward_time=1.028, grad_norm=121.195, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.383e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 15:41:21,653 (trainer:732) INFO: 45epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.145, loss_ctc=67.840, loss_att=50.503, acc=0.699, loss=55.704, backward_time=1.028, grad_norm=110.323, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.382e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 15:43:03,648 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 15:43:21,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6ac05f430>)
+[gpub002:0/64] 2023-07-13 15:43:25,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 15:43:25,360 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 15:47:55,266 (trainer:732) INFO: 45epoch:train:1601-1700batch: iter_time=2.508, forward_time=0.175, loss_ctc=74.468, loss_att=56.404, acc=0.712, loss=61.823, backward_time=1.040, grad_norm=106.781, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.381e-05, train_time=7.873
+[gpub002:0/64] 2023-07-13 15:50:12,061 (trainer:732) INFO: 45epoch:train:1701-1800batch: iter_time=1.176e-04, forward_time=0.148, loss_ctc=71.768, loss_att=53.722, acc=0.718, loss=59.136, backward_time=1.032, grad_norm=124.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.381e-05, train_time=2.736
+[gpub002:0/64] 2023-07-13 15:52:28,137 (trainer:732) INFO: 45epoch:train:1801-1900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=73.393, loss_att=54.484, acc=0.706, loss=60.157, backward_time=1.028, grad_norm=122.871, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 15:54:44,035 (trainer:732) INFO: 45epoch:train:1901-2000batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=72.218, loss_att=56.883, acc=0.716, loss=61.484, backward_time=1.029, grad_norm=99.995, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.380e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 15:56:59,596 (trainer:732) INFO: 45epoch:train:2001-2100batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=64.329, loss_att=47.171, acc=0.728, loss=52.319, backward_time=1.027, grad_norm=106.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.379e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 15:59:15,764 (trainer:732) INFO: 45epoch:train:2101-2200batch: iter_time=1.161e-04, forward_time=0.147, loss_ctc=78.537, loss_att=63.446, acc=0.702, loss=67.973, backward_time=1.030, grad_norm=116.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 16:01:31,536 (trainer:732) INFO: 45epoch:train:2201-2300batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=72.395, loss_att=52.310, acc=0.716, loss=58.335, backward_time=1.027, grad_norm=121.900, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.378e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 16:04:07,432 (trainer:732) INFO: 45epoch:train:2301-2400batch: iter_time=4.133e-04, forward_time=0.291, loss_ctc=64.940, loss_att=49.301, acc=0.698, loss=53.992, backward_time=1.048, grad_norm=119.660, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.195, optim0_lr0=5.377e-05, train_time=3.119
+[gpub002:0/64] 2023-07-13 16:06:26,316 (trainer:732) INFO: 45epoch:train:2401-2500batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=66.773, loss_att=49.837, acc=0.725, loss=54.918, backward_time=1.033, grad_norm=113.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=2.778
+[gpub002:0/64] 2023-07-13 16:06:47,569 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 16:07:05,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 16:07:09,245 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad413c7790>)
+[gpub002:0/64] 2023-07-13 16:07:09,246 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 16:07:09,252 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 16:13:45,255 (trainer:732) INFO: 45epoch:train:2501-2600batch: iter_time=2.932, forward_time=0.146, loss_ctc=75.245, loss_att=59.285, acc=0.705, loss=64.073, backward_time=1.045, grad_norm=143.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.376e-05, train_time=8.779
+[gpub002:0/64] 2023-07-13 16:16:02,493 (trainer:732) INFO: 45epoch:train:2601-2700batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=71.169, loss_att=50.380, acc=0.721, loss=56.617, backward_time=1.031, grad_norm=126.623, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.375e-05, train_time=2.745
+[gpub002:0/64] 2023-07-13 16:18:18,405 (trainer:732) INFO: 45epoch:train:2701-2800batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.984, acc=0.701, loss=61.048, backward_time=1.030, grad_norm=110.866, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.375e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 16:20:34,197 (trainer:732) INFO: 45epoch:train:2801-2900batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=71.354, loss_att=54.781, acc=0.724, loss=59.753, backward_time=1.029, grad_norm=114.172, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.374e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 16:22:50,091 (trainer:732) INFO: 45epoch:train:2901-3000batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=63.704, loss_att=47.879, acc=0.721, loss=52.626, backward_time=1.031, grad_norm=113.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 16:25:08,892 (trainer:732) INFO: 45epoch:train:3001-3100batch: iter_time=1.287e-04, forward_time=0.146, loss_ctc=80.531, loss_att=66.219, acc=0.702, loss=70.512, backward_time=1.031, grad_norm=129.615, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.373e-05, train_time=2.776
+[gpub002:0/64] 2023-07-13 16:27:26,710 (trainer:732) INFO: 45epoch:train:3101-3200batch: iter_time=1.241e-04, forward_time=0.146, loss_ctc=74.038, loss_att=52.789, acc=0.722, loss=59.164, backward_time=1.033, grad_norm=120.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.372e-05, train_time=2.756
+[gpub002:0/64] 2023-07-13 16:29:45,474 (trainer:732) INFO: 45epoch:train:3201-3300batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=64.774, loss_att=50.047, acc=0.692, loss=54.465, backward_time=1.031, grad_norm=159.741, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.372e-05, train_time=2.775
+[gpub002:0/64] 2023-07-13 16:30:34,615 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 16:30:52,965 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabeb5df460>)
+[gpub002:0/64] 2023-07-13 16:30:56,648 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 16:30:56,655 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 16:36:51,770 (trainer:732) INFO: 45epoch:train:3301-3400batch: iter_time=1.357, forward_time=0.191, loss_ctc=74.879, loss_att=59.904, acc=0.707, loss=64.396, backward_time=1.042, grad_norm=130.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.371e-05, train_time=8.525
+[gpub002:0/64] 2023-07-13 16:39:07,620 (trainer:732) INFO: 45epoch:train:3401-3500batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=72.274, loss_att=48.909, acc=0.725, loss=55.919, backward_time=1.029, grad_norm=119.444, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 16:41:23,193 (trainer:732) INFO: 45epoch:train:3501-3600batch: iter_time=1.335e-04, forward_time=0.146, loss_ctc=73.254, loss_att=56.523, acc=0.710, loss=61.542, backward_time=1.027, grad_norm=136.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.370e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 16:43:38,990 (trainer:732) INFO: 45epoch:train:3601-3700batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=68.969, loss_att=55.099, acc=0.704, loss=59.260, backward_time=1.029, grad_norm=118.124, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.369e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 16:45:54,638 (trainer:732) INFO: 45epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=67.300, loss_att=51.250, acc=0.710, loss=56.065, backward_time=1.027, grad_norm=122.728, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 16:48:11,353 (trainer:732) INFO: 45epoch:train:3801-3900batch: iter_time=1.176e-04, forward_time=0.147, loss_ctc=79.048, loss_att=65.393, acc=0.693, loss=69.490, backward_time=1.029, grad_norm=138.605, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.368e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 16:50:30,597 (trainer:732) INFO: 45epoch:train:3901-4000batch: iter_time=1.266e-04, forward_time=0.166, loss_ctc=67.808, loss_att=48.262, acc=0.723, loss=54.126, backward_time=1.032, grad_norm=118.096, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.367e-05, train_time=2.785
+[gpub002:0/64] 2023-07-13 16:52:46,308 (trainer:732) INFO: 45epoch:train:4001-4100batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=66.270, loss_att=49.916, acc=0.699, loss=54.822, backward_time=1.029, grad_norm=112.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.367e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 16:54:21,064 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 16:54:39,212 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad7159f460>)
+[gpub002:0/64] 2023-07-13 16:54:42,631 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 16:54:42,637 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 17:00:13,505 (trainer:732) INFO: 45epoch:train:4101-4200batch: iter_time=1.377, forward_time=0.185, loss_ctc=69.143, loss_att=51.631, acc=0.718, loss=56.885, backward_time=1.038, grad_norm=123.088, clip=100.000, loss_scale=3.829e+32, optim_step_time=0.184, optim0_lr0=5.366e-05, train_time=8.944
+[gpub002:0/64] 2023-07-13 17:02:31,518 (trainer:732) INFO: 45epoch:train:4201-4300batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=78.116, loss_att=61.080, acc=0.702, loss=66.191, backward_time=1.036, grad_norm=135.639, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.365e-05, train_time=2.760
+[gpub002:0/64] 2023-07-13 17:04:47,316 (trainer:732) INFO: 45epoch:train:4301-4400batch: iter_time=1.076e-04, forward_time=0.144, loss_ctc=73.863, loss_att=48.660, acc=0.731, loss=56.221, backward_time=1.028, grad_norm=111.515, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.365e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 17:07:04,029 (trainer:732) INFO: 45epoch:train:4401-4500batch: iter_time=1.058e-04, forward_time=0.146, loss_ctc=69.350, loss_att=55.129, acc=0.715, loss=59.396, backward_time=1.031, grad_norm=110.270, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.364e-05, train_time=2.734
+[gpub002:0/64] 2023-07-13 17:09:30,922 (trainer:732) INFO: 45epoch:train:4501-4600batch: iter_time=1.035e-04, forward_time=0.146, loss_ctc=64.703, loss_att=49.097, acc=0.721, loss=53.779, backward_time=1.044, grad_norm=107.915, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=2.938
+[gpub002:0/64] 2023-07-13 17:12:18,786 (trainer:732) INFO: 45epoch:train:4601-4700batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=73.234, loss_att=56.968, acc=0.715, loss=61.847, backward_time=1.061, grad_norm=124.928, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.363e-05, train_time=3.357
+[gpub002:0/64] 2023-07-13 17:14:35,194 (trainer:732) INFO: 45epoch:train:4701-4800batch: iter_time=1.380e-04, forward_time=0.148, loss_ctc=76.055, loss_att=59.898, acc=0.712, loss=64.745, backward_time=1.032, grad_norm=120.870, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.728
+[gpub002:0/64] 2023-07-13 17:16:50,801 (trainer:732) INFO: 45epoch:train:4801-4900batch: iter_time=1.407e-04, forward_time=0.146, loss_ctc=62.951, loss_att=45.591, acc=0.716, loss=50.799, backward_time=1.027, grad_norm=117.396, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.362e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 17:19:06,572 (trainer:732) INFO: 45epoch:train:4901-5000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=68.808, loss_att=51.751, acc=0.704, loss=56.868, backward_time=1.028, grad_norm=136.363, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.361e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 17:19:21,389 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 17:19:39,673 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc103b460>)
+[gpub002:0/64] 2023-07-13 17:19:43,090 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 17:19:43,096 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 17:24:59,179 (trainer:732) INFO: 45epoch:train:5001-5100batch: iter_time=2.009, forward_time=0.147, loss_ctc=73.429, loss_att=58.346, acc=0.701, loss=62.871, backward_time=1.048, grad_norm=125.174, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=7.052
+[gpub002:0/64] 2023-07-13 17:27:16,621 (trainer:732) INFO: 45epoch:train:5101-5200batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=69.877, loss_att=49.329, acc=0.732, loss=55.493, backward_time=1.030, grad_norm=121.180, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.360e-05, train_time=2.749
+[gpub002:0/64] 2023-07-13 17:29:32,165 (trainer:732) INFO: 45epoch:train:5201-5300batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=72.769, loss_att=55.528, acc=0.703, loss=60.701, backward_time=1.026, grad_norm=161.988, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 17:31:47,590 (trainer:732) INFO: 45epoch:train:5301-5400batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=71.449, loss_att=55.740, acc=0.713, loss=60.453, backward_time=1.025, grad_norm=134.067, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.359e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 17:34:03,500 (trainer:732) INFO: 45epoch:train:5401-5500batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=63.744, loss_att=47.642, acc=0.716, loss=52.473, backward_time=1.028, grad_norm=120.630, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.358e-05, train_time=2.718
+[gpub002:0/64] 2023-07-13 17:36:19,787 (trainer:732) INFO: 45epoch:train:5501-5600batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=80.084, loss_att=68.051, acc=0.690, loss=71.661, backward_time=1.028, grad_norm=136.204, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 17:38:35,468 (trainer:732) INFO: 45epoch:train:5601-5700batch: iter_time=1.136e-04, forward_time=0.145, loss_ctc=73.364, loss_att=52.280, acc=0.718, loss=58.605, backward_time=1.026, grad_norm=133.967, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.357e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 17:40:50,747 (trainer:732) INFO: 45epoch:train:5701-5800batch: iter_time=1.299e-04, forward_time=0.145, loss_ctc=64.239, loss_att=50.021, acc=0.693, loss=54.286, backward_time=1.026, grad_norm=113.559, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.356e-05, train_time=2.705
+[gpub002:0/64] 2023-07-13 17:41:39,369 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 17:41:57,470 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7facf32a1fc0>)
+[gpub002:0/64] 2023-07-13 17:42:01,065 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 17:42:01,071 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 17:49:41,785 (trainer:732) INFO: 45epoch:train:5801-5900batch: iter_time=1.389, forward_time=0.199, loss_ctc=66.566, loss_att=47.542, acc=0.725, loss=53.249, backward_time=1.047, grad_norm=123.650, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.185, optim0_lr0=5.355e-05, train_time=10.620
+[gpub002:0/64] 2023-07-13 17:51:58,518 (trainer:732) INFO: 45epoch:train:5901-6000batch: iter_time=1.269e-04, forward_time=0.148, loss_ctc=76.867, loss_att=60.070, acc=0.716, loss=65.109, backward_time=1.030, grad_norm=146.171, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.355e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 17:54:14,647 (trainer:732) INFO: 45epoch:train:6001-6100batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=73.164, loss_att=52.146, acc=0.712, loss=58.451, backward_time=1.031, grad_norm=118.081, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.722
+[gpub002:0/64] 2023-07-13 17:56:25,548 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 17:56:30,992 (trainer:732) INFO: 45epoch:train:6101-6200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=72.297, loss_att=55.985, acc=0.727, loss=60.878, backward_time=1.033, grad_norm=127.067, clip=100.000, loss_scale=6.358e+32, optim_step_time=0.182, optim0_lr0=5.354e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 17:58:48,413 (trainer:732) INFO: 45epoch:train:6201-6300batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=62.624, loss_att=46.194, acc=0.728, loss=51.123, backward_time=1.042, grad_norm=124.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.353e-05, train_time=2.748
+[gpub002:0/64] 2023-07-13 18:01:05,345 (trainer:732) INFO: 45epoch:train:6301-6400batch: iter_time=9.910e-05, forward_time=0.146, loss_ctc=75.500, loss_att=57.283, acc=0.714, loss=62.749, backward_time=1.031, grad_norm=118.320, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 18:03:22,316 (trainer:732) INFO: 45epoch:train:6401-6500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=70.999, loss_att=56.502, acc=0.721, loss=60.851, backward_time=1.031, grad_norm=119.958, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.352e-05, train_time=2.739
+[gpub002:0/64] 2023-07-13 18:05:38,177 (trainer:732) INFO: 45epoch:train:6501-6600batch: iter_time=1.006e-04, forward_time=0.144, loss_ctc=66.854, loss_att=49.259, acc=0.703, loss=54.538, backward_time=1.027, grad_norm=102.459, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 18:07:21,216 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 18:07:39,658 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fafda1af4f0>)
+[gpub002:0/64] 2023-07-13 18:07:43,082 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 18:07:43,088 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 18:12:41,336 (trainer:732) INFO: 45epoch:train:6601-6700batch: iter_time=1.633, forward_time=0.145, loss_ctc=73.582, loss_att=56.482, acc=0.708, loss=61.612, backward_time=1.040, grad_norm=136.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.351e-05, train_time=8.463
+[gpub002:0/64] 2023-07-13 18:14:58,720 (trainer:732) INFO: 45epoch:train:6701-6800batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=70.722, loss_att=52.661, acc=0.720, loss=58.080, backward_time=1.033, grad_norm=127.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.350e-05, train_time=2.747
+[gpub002:0/64] 2023-07-13 18:17:17,712 (trainer:732) INFO: 45epoch:train:6801-6900batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=73.147, loss_att=53.137, acc=0.710, loss=59.140, backward_time=1.047, grad_norm=130.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.780
+[gpub002:0/64] 2023-07-13 18:19:40,571 (trainer:732) INFO: 45epoch:train:6901-7000batch: iter_time=1.008e-04, forward_time=0.145, loss_ctc=70.596, loss_att=56.317, acc=0.715, loss=60.601, backward_time=1.032, grad_norm=138.486, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.349e-05, train_time=2.857
+[gpub002:0/64] 2023-07-13 18:22:00,623 (trainer:732) INFO: 45epoch:train:7001-7100batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.705, loss_att=46.602, acc=0.721, loss=51.733, backward_time=1.033, grad_norm=101.592, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=2.801
+[gpub002:0/64] 2023-07-13 18:24:43,695 (trainer:732) INFO: 45epoch:train:7101-7200batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=78.651, loss_att=65.443, acc=0.692, loss=69.405, backward_time=1.056, grad_norm=153.875, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.348e-05, train_time=3.261
+[gpub002:0/64] 2023-07-13 18:27:00,050 (trainer:732) INFO: 45epoch:train:7201-7300batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=69.650, loss_att=51.477, acc=0.714, loss=56.929, backward_time=1.030, grad_norm=126.353, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.347e-05, train_time=2.727
+[gpub002:0/64] 2023-07-13 18:29:15,908 (trainer:732) INFO: 45epoch:train:7301-7400batch: iter_time=1.172e-04, forward_time=0.145, loss_ctc=64.906, loss_att=48.559, acc=0.703, loss=53.463, backward_time=1.030, grad_norm=131.884, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.717
+[gpub002:0/64] 2023-07-13 18:31:31,376 (trainer:732) INFO: 45epoch:train:7401-7500batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=66.285, loss_att=49.804, acc=0.719, loss=54.748, backward_time=1.027, grad_norm=119.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.346e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 18:31:39,809 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 18:31:58,133 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fadabd1ff40>)
+[gpub002:0/64] 2023-07-13 18:32:01,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-13 18:32:01,560 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 18:38:55,570 (trainer:732) INFO: 45epoch:train:7501-7600batch: iter_time=2.968, forward_time=0.205, loss_ctc=74.821, loss_att=58.310, acc=0.701, loss=63.263, backward_time=1.049, grad_norm=140.580, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.345e-05, train_time=8.883
+[gpub002:0/64] 2023-07-13 18:41:12,994 (trainer:732) INFO: 45epoch:train:7601-7700batch: iter_time=1.401e-04, forward_time=0.146, loss_ctc=70.499, loss_att=49.199, acc=0.733, loss=55.589, backward_time=1.030, grad_norm=115.910, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.749
+[gpub002:0/64] 2023-07-13 18:43:29,468 (trainer:732) INFO: 45epoch:train:7701-7800batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=72.586, loss_att=55.027, acc=0.709, loss=60.295, backward_time=1.027, grad_norm=112.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.344e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 18:45:45,793 (trainer:732) INFO: 45epoch:train:7801-7900batch: iter_time=1.293e-04, forward_time=0.144, loss_ctc=72.517, loss_att=54.985, acc=0.716, loss=60.244, backward_time=1.025, grad_norm=117.890, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 18:48:24,077 (trainer:732) INFO: 45epoch:train:7901-8000batch: iter_time=1.442e-04, forward_time=0.145, loss_ctc=63.111, loss_att=46.756, acc=0.716, loss=51.662, backward_time=1.057, grad_norm=120.211, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.343e-05, train_time=3.165
+[gpub002:0/64] 2023-07-13 18:50:40,521 (trainer:732) INFO: 45epoch:train:8001-8100batch: iter_time=1.278e-04, forward_time=0.147, loss_ctc=78.732, loss_att=66.222, acc=0.695, loss=69.975, backward_time=1.031, grad_norm=170.247, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.342e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 18:52:56,982 (trainer:732) INFO: 45epoch:train:8101-8200batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=71.237, loss_att=50.828, acc=0.721, loss=56.951, backward_time=1.031, grad_norm=144.493, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 18:55:12,607 (trainer:732) INFO: 45epoch:train:8201-8300batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=63.342, loss_att=47.314, acc=0.705, loss=52.123, backward_time=1.027, grad_norm=120.140, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.341e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 18:56:02,679 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-13 18:56:21,043 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac3fa106a0>)
+[gpub002:0/64] 2023-07-13 18:56:24,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 18:56:24,481 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 19:01:54,411 (trainer:732) INFO: 45epoch:train:8301-8400batch: iter_time=1.403, forward_time=0.145, loss_ctc=67.802, loss_att=47.923, acc=0.722, loss=53.886, backward_time=1.041, grad_norm=103.039, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=8.036
+[gpub002:0/64] 2023-07-13 19:04:12,062 (trainer:732) INFO: 45epoch:train:8401-8500batch: iter_time=1.296e-04, forward_time=0.144, loss_ctc=77.578, loss_att=59.511, acc=0.713, loss=64.931, backward_time=1.028, grad_norm=113.538, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.340e-05, train_time=2.753
+[gpub002:0/64] 2023-07-13 19:06:36,114 (trainer:732) INFO: 45epoch:train:8501-8600batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=72.515, loss_att=51.973, acc=0.709, loss=58.136, backward_time=1.037, grad_norm=112.074, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.339e-05, train_time=2.881
+[gpub002:0/64] 2023-07-13 19:08:58,016 (trainer:732) INFO: 45epoch:train:8601-8700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=70.363, loss_att=55.476, acc=0.722, loss=59.942, backward_time=1.052, grad_norm=139.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.838
+[gpub002:0/64] 2023-07-13 19:11:23,653 (trainer:732) INFO: 45epoch:train:8701-8800batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=61.325, loss_att=45.766, acc=0.718, loss=50.433, backward_time=1.067, grad_norm=128.516, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.338e-05, train_time=2.913
+[gpub002:0/64] 2023-07-13 19:13:45,579 (trainer:732) INFO: 45epoch:train:8801-8900batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=75.654, loss_att=57.454, acc=0.712, loss=62.914, backward_time=1.033, grad_norm=163.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.838
+[gpub002:0/64] 2023-07-13 19:16:01,117 (trainer:732) INFO: 45epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=71.877, loss_att=58.040, acc=0.706, loss=62.191, backward_time=1.026, grad_norm=150.120, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.337e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 19:18:23,985 (trainer:732) INFO: 45epoch:train:9001-9100batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=65.441, loss_att=49.344, acc=0.702, loss=54.173, backward_time=1.036, grad_norm=131.564, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.336e-05, train_time=2.857
+[gpub002:0/64] 2023-07-13 19:20:22,038 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-13 19:20:40,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac0f930220>)
+[gpub002:0/64] 2023-07-13 19:20:44,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 19:20:44,279 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 19:26:50,491 (trainer:732) INFO: 45epoch:train:9101-9200batch: iter_time=1.709, forward_time=0.179, loss_ctc=67.413, loss_att=51.765, acc=0.712, loss=56.459, backward_time=1.050, grad_norm=122.252, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.335e-05, train_time=10.130
+[gpub002:0/64] 2023-07-13 19:29:07,652 (trainer:732) INFO: 45epoch:train:9201-9300batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=76.983, loss_att=59.756, acc=0.700, loss=64.924, backward_time=1.032, grad_norm=139.788, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.335e-05, train_time=2.742
+[gpub002:0/64] 2023-07-13 19:31:25,100 (trainer:732) INFO: 45epoch:train:9301-9400batch: iter_time=9.813e-05, forward_time=0.145, loss_ctc=72.296, loss_att=47.126, acc=0.734, loss=54.677, backward_time=1.028, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.750
+[gpub002:0/64] 2023-07-13 19:33:41,318 (trainer:732) INFO: 45epoch:train:9401-9500batch: iter_time=9.632e-05, forward_time=0.145, loss_ctc=71.884, loss_att=54.495, acc=0.719, loss=59.712, backward_time=1.027, grad_norm=117.673, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.334e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 19:35:57,136 (trainer:732) INFO: 45epoch:train:9501-9600batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=64.752, loss_att=48.917, acc=0.719, loss=53.668, backward_time=1.026, grad_norm=117.335, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.333e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 19:38:13,202 (trainer:732) INFO: 45epoch:train:9601-9700batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=71.752, loss_att=55.035, acc=0.712, loss=60.050, backward_time=1.028, grad_norm=125.109, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.721
+[gpub002:0/64] 2023-07-13 19:40:28,939 (trainer:732) INFO: 45epoch:train:9701-9800batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=75.944, loss_att=60.847, acc=0.702, loss=65.376, backward_time=1.027, grad_norm=149.069, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.332e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 19:42:44,392 (trainer:732) INFO: 45epoch:train:9801-9900batch: iter_time=1.022e-04, forward_time=0.143, loss_ctc=61.928, loss_att=45.638, acc=0.717, loss=50.525, backward_time=1.025, grad_norm=106.833, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.331e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 19:44:59,782 (trainer:732) INFO: 45epoch:train:9901-10000batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=68.171, loss_att=50.374, acc=0.709, loss=55.713, backward_time=1.024, grad_norm=116.675, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.330e-05, train_time=2.708
+[gpub002:0/64] 2023-07-13 19:59:12,558 (trainer:338) INFO: 45epoch results: [train] iter_time=0.241, forward_time=0.153, loss_ctc=71.101, loss_att=53.886, acc=0.712, loss=59.051, backward_time=1.034, grad_norm=125.414, clip=100.000, loss_scale=3.957e+32, optim_step_time=0.183, optim0_lr0=5.361e-05, train_time=3.486, time=4 hours, 50 minutes and 52.55 seconds, total_count=420000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.917, cer_ctc=0.255, loss_att=35.722, acc=0.704, cer=0.334, wer=0.986, loss=37.881, time=7 minutes and 50.24 seconds, total_count=43010, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 59.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-13 19:59:28,695 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/29epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/40epoch.pth
+[gpub002:0/64] 2023-07-13 19:59:28,788 (trainer:272) INFO: 46/50epoch started. Estimated time to finish: 1 day, 1 hour and 33 minutes
+[gpub002:0/64] 2023-07-13 19:59:28,792 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-13 19:59:46,914 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbc6d665de0>)
+[gpub002:0/64] 2023-07-13 19:59:50,345 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-13 19:59:50,352 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 20:04:40,186 (trainer:732) INFO: 46epoch:train:1-100batch: iter_time=1.647, forward_time=0.188, loss_ctc=65.883, loss_att=56.587, acc=0.712, loss=59.376, backward_time=1.048, grad_norm=121.603, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.330e-05, train_time=6.227
+[gpub002:0/64] 2023-07-13 20:06:57,011 (trainer:732) INFO: 46epoch:train:101-200batch: iter_time=1.304e-04, forward_time=0.146, loss_ctc=73.119, loss_att=52.389, acc=0.708, loss=58.608, backward_time=1.033, grad_norm=126.577, clip=100.000, loss_scale=3.375e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.737
+[gpub002:0/64] 2023-07-13 20:09:13,490 (trainer:732) INFO: 46epoch:train:201-300batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=70.101, loss_att=49.203, acc=0.726, loss=55.472, backward_time=1.027, grad_norm=136.978, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.329e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 20:11:29,153 (trainer:732) INFO: 46epoch:train:301-400batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=67.230, loss_att=54.634, acc=0.706, loss=58.413, backward_time=1.026, grad_norm=117.807, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.328e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 20:13:47,110 (trainer:732) INFO: 46epoch:train:401-500batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=68.084, loss_att=51.893, acc=0.716, loss=56.751, backward_time=1.029, grad_norm=120.025, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.759
+[gpub002:0/64] 2023-07-13 20:16:12,488 (trainer:732) INFO: 46epoch:train:501-600batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=81.257, loss_att=58.875, acc=0.709, loss=65.590, backward_time=1.037, grad_norm=140.480, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.327e-05, train_time=2.907
+[gpub002:0/64] 2023-07-13 20:18:37,169 (trainer:732) INFO: 46epoch:train:601-700batch: iter_time=1.257e-04, forward_time=0.145, loss_ctc=62.945, loss_att=44.662, acc=0.715, loss=50.147, backward_time=1.036, grad_norm=124.728, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.893
+[gpub002:0/64] 2023-07-13 20:21:00,820 (trainer:732) INFO: 46epoch:train:701-800batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=73.224, loss_att=50.781, acc=0.730, loss=57.514, backward_time=1.034, grad_norm=141.813, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.326e-05, train_time=2.873
+[gpub002:0/64] 2023-07-13 20:22:00,295 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-13 20:22:18,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbb5e311c30>)
+[gpub002:0/64] 2023-07-13 20:22:21,538 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-13 20:22:21,544 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 20:28:54,861 (trainer:732) INFO: 46epoch:train:801-900batch: iter_time=3.233, forward_time=0.196, loss_ctc=66.161, loss_att=51.935, acc=0.720, loss=56.203, backward_time=1.045, grad_norm=126.426, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.190, optim0_lr0=5.325e-05, train_time=9.480
+[gpub002:0/64] 2023-07-13 20:31:12,985 (trainer:732) INFO: 46epoch:train:901-1000batch: iter_time=0.001, forward_time=0.153, loss_ctc=74.152, loss_att=55.993, acc=0.706, loss=61.441, backward_time=1.032, grad_norm=120.946, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.763
+[gpub002:0/64] 2023-07-13 20:33:29,468 (trainer:732) INFO: 46epoch:train:1001-1100batch: iter_time=0.001, forward_time=0.146, loss_ctc=65.898, loss_att=48.417, acc=0.733, loss=53.661, backward_time=1.032, grad_norm=117.557, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.324e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 20:36:03,141 (trainer:732) INFO: 46epoch:train:1101-1200batch: iter_time=2.935e-04, forward_time=0.273, loss_ctc=68.912, loss_att=53.279, acc=0.714, loss=57.969, backward_time=1.053, grad_norm=151.138, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.189, optim0_lr0=5.323e-05, train_time=3.072
+[gpub002:0/64] 2023-07-13 20:38:19,536 (trainer:732) INFO: 46epoch:train:1201-1300batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=70.800, loss_att=51.181, acc=0.720, loss=57.067, backward_time=1.029, grad_norm=152.229, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.323e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 20:40:36,527 (trainer:732) INFO: 46epoch:train:1301-1400batch: iter_time=1.269e-04, forward_time=0.150, loss_ctc=73.810, loss_att=54.374, acc=0.723, loss=60.205, backward_time=1.030, grad_norm=167.798, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.322e-05, train_time=2.740
+[gpub002:0/64] 2023-07-13 20:42:52,466 (trainer:732) INFO: 46epoch:train:1401-1500batch: iter_time=1.267e-04, forward_time=0.146, loss_ctc=68.079, loss_att=49.216, acc=0.708, loss=54.875, backward_time=1.026, grad_norm=209.029, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.321e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 20:45:08,125 (trainer:732) INFO: 46epoch:train:1501-1600batch: iter_time=1.277e-04, forward_time=0.144, loss_ctc=69.590, loss_att=48.587, acc=0.725, loss=54.888, backward_time=1.026, grad_norm=143.890, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.181, optim0_lr0=5.321e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 20:46:58,016 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-13 20:47:16,447 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1bb607700>)
+[gpub002:0/64] 2023-07-13 20:47:19,843 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-13 20:47:19,979 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 20:53:32,464 (trainer:732) INFO: 46epoch:train:1601-1700batch: iter_time=3.523, forward_time=0.178, loss_ctc=68.290, loss_att=51.711, acc=0.723, loss=56.685, backward_time=1.040, grad_norm=114.130, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.320e-05, train_time=10.086
+[gpub002:0/64] 2023-07-13 20:55:49,050 (trainer:732) INFO: 46epoch:train:1701-1800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=69.254, loss_att=57.546, acc=0.707, loss=61.058, backward_time=1.030, grad_norm=147.850, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.320e-05, train_time=2.732
+[gpub002:0/64] 2023-07-13 20:58:04,545 (trainer:732) INFO: 46epoch:train:1801-1900batch: iter_time=1.289e-04, forward_time=0.144, loss_ctc=67.059, loss_att=46.493, acc=0.719, loss=52.663, backward_time=1.026, grad_norm=125.550, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.319e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 21:00:20,791 (trainer:732) INFO: 46epoch:train:1901-2000batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=67.667, loss_att=50.905, acc=0.727, loss=55.934, backward_time=1.029, grad_norm=127.276, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.725
+[gpub002:0/64] 2023-07-13 21:02:36,612 (trainer:732) INFO: 46epoch:train:2001-2100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=67.126, loss_att=48.506, acc=0.720, loss=54.092, backward_time=1.028, grad_norm=118.373, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.318e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 21:04:53,111 (trainer:732) INFO: 46epoch:train:2101-2200batch: iter_time=1.325e-04, forward_time=0.148, loss_ctc=72.830, loss_att=55.843, acc=0.709, loss=60.939, backward_time=1.030, grad_norm=138.565, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.729
+[gpub002:0/64] 2023-07-13 21:07:08,871 (trainer:732) INFO: 46epoch:train:2201-2300batch: iter_time=1.351e-04, forward_time=0.145, loss_ctc=69.103, loss_att=52.736, acc=0.704, loss=57.646, backward_time=1.028, grad_norm=122.386, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.317e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 21:09:24,380 (trainer:732) INFO: 46epoch:train:2301-2400batch: iter_time=1.543e-04, forward_time=0.145, loss_ctc=63.429, loss_att=44.374, acc=0.725, loss=50.090, backward_time=1.027, grad_norm=140.538, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.316e-05, train_time=2.710
+[gpub002:0/64] 2023-07-13 21:11:48,859 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-13 21:12:06,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc5af8250>)
+[gpub002:0/64] 2023-07-13 21:12:10,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-13 21:12:10,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 21:17:51,096 (trainer:732) INFO: 46epoch:train:2401-2500batch: iter_time=1.340, forward_time=0.145, loss_ctc=74.058, loss_att=50.477, acc=0.726, loss=57.551, backward_time=1.066, grad_norm=158.986, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.315e-05, train_time=10.134
+[gpub002:0/64] 2023-07-13 21:20:12,148 (trainer:732) INFO: 46epoch:train:2501-2600batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=73.951, loss_att=59.611, acc=0.702, loss=63.913, backward_time=1.038, grad_norm=135.632, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.315e-05, train_time=2.821
+[gpub002:0/64] 2023-07-13 21:22:28,950 (trainer:732) INFO: 46epoch:train:2601-2700batch: iter_time=1.170e-04, forward_time=0.143, loss_ctc=65.472, loss_att=47.308, acc=0.721, loss=52.757, backward_time=1.030, grad_norm=119.313, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.736
+[gpub002:0/64] 2023-07-13 21:24:44,625 (trainer:732) INFO: 46epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=70.360, loss_att=52.584, acc=0.716, loss=57.916, backward_time=1.028, grad_norm=125.573, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.314e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 21:27:00,394 (trainer:732) INFO: 46epoch:train:2801-2900batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=66.237, loss_att=48.990, acc=0.715, loss=54.164, backward_time=1.028, grad_norm=119.418, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.313e-05, train_time=2.715
+[gpub002:0/64] 2023-07-13 21:29:16,078 (trainer:732) INFO: 46epoch:train:2901-3000batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=69.138, loss_att=52.011, acc=0.715, loss=57.149, backward_time=1.028, grad_norm=140.082, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 21:31:31,702 (trainer:732) INFO: 46epoch:train:3001-3100batch: iter_time=1.423e-04, forward_time=0.145, loss_ctc=69.682, loss_att=52.735, acc=0.704, loss=57.819, backward_time=1.028, grad_norm=112.062, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.312e-05, train_time=2.712
+[gpub002:0/64] 2023-07-13 21:33:47,412 (trainer:732) INFO: 46epoch:train:3101-3200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=67.778, loss_att=47.656, acc=0.722, loss=53.693, backward_time=1.027, grad_norm=128.979, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 21:36:02,959 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 21:36:02,969 (trainer:732) INFO: 46epoch:train:3201-3300batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=74.814, loss_att=52.797, acc=0.718, loss=59.402, backward_time=1.028, grad_norm=125.040, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.311e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 21:36:49,099 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-13 21:37:07,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2f8534c0>)
+[gpub002:0/64] 2023-07-13 21:37:10,547 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-13 21:37:10,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 21:42:10,676 (trainer:732) INFO: 46epoch:train:3301-3400batch: iter_time=1.578, forward_time=0.207, loss_ctc=67.011, loss_att=54.152, acc=0.720, loss=58.010, backward_time=1.045, grad_norm=167.072, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.310e-05, train_time=7.354
+[gpub002:0/64] 2023-07-13 21:44:26,985 (trainer:732) INFO: 46epoch:train:3401-3500batch: iter_time=1.279e-04, forward_time=0.146, loss_ctc=73.361, loss_att=54.970, acc=0.712, loss=60.487, backward_time=1.029, grad_norm=126.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.309e-05, train_time=2.726
+[gpub002:0/64] 2023-07-13 21:46:42,557 (trainer:732) INFO: 46epoch:train:3501-3600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=64.685, loss_att=47.711, acc=0.736, loss=52.804, backward_time=1.026, grad_norm=107.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.309e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 21:48:58,259 (trainer:732) INFO: 46epoch:train:3601-3700batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=68.193, loss_att=53.136, acc=0.717, loss=57.653, backward_time=1.026, grad_norm=121.607, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.308e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 21:51:15,506 (trainer:732) INFO: 46epoch:train:3701-3800batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=69.434, loss_att=50.735, acc=0.724, loss=56.345, backward_time=1.030, grad_norm=108.644, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.308e-05, train_time=2.744
+[gpub002:0/64] 2023-07-13 21:53:38,632 (trainer:732) INFO: 46epoch:train:3801-3900batch: iter_time=1.193e-04, forward_time=0.157, loss_ctc=71.575, loss_att=53.589, acc=0.723, loss=58.985, backward_time=1.046, grad_norm=142.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.307e-05, train_time=2.863
+[gpub002:0/64] 2023-07-13 21:55:57,708 (trainer:732) INFO: 46epoch:train:3901-4000batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=67.232, loss_att=49.694, acc=0.711, loss=54.956, backward_time=1.033, grad_norm=118.653, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.306e-05, train_time=2.781
+[gpub002:0/64] 2023-07-13 21:58:13,850 (trainer:732) INFO: 46epoch:train:4001-4100batch: iter_time=1.079e-04, forward_time=0.145, loss_ctc=69.507, loss_att=48.626, acc=0.726, loss=54.891, backward_time=1.030, grad_norm=115.628, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.306e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 22:00:07,007 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-13 22:00:25,302 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c8e3fd0>)
+[gpub002:0/64] 2023-07-13 22:00:28,740 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-13 22:00:28,746 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 22:06:11,914 (trainer:732) INFO: 46epoch:train:4101-4200batch: iter_time=3.280, forward_time=0.182, loss_ctc=67.564, loss_att=50.602, acc=0.727, loss=55.691, backward_time=1.048, grad_norm=124.825, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.305e-05, train_time=9.561
+[gpub002:0/64] 2023-07-13 22:08:28,453 (trainer:732) INFO: 46epoch:train:4201-4300batch: iter_time=1.204e-04, forward_time=0.144, loss_ctc=69.532, loss_att=59.594, acc=0.704, loss=62.576, backward_time=1.031, grad_norm=120.778, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.305e-05, train_time=2.731
+[gpub002:0/64] 2023-07-13 22:10:44,255 (trainer:732) INFO: 46epoch:train:4301-4400batch: iter_time=1.145e-04, forward_time=0.144, loss_ctc=67.758, loss_att=47.109, acc=0.721, loss=53.304, backward_time=1.027, grad_norm=107.527, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.304e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 22:12:59,720 (trainer:732) INFO: 46epoch:train:4401-4500batch: iter_time=1.420e-04, forward_time=0.144, loss_ctc=68.105, loss_att=51.162, acc=0.727, loss=56.245, backward_time=1.025, grad_norm=134.544, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.303e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 22:15:15,435 (trainer:732) INFO: 46epoch:train:4501-4600batch: iter_time=1.536e-04, forward_time=0.146, loss_ctc=67.447, loss_att=49.089, acc=0.720, loss=54.596, backward_time=1.028, grad_norm=123.003, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.303e-05, train_time=2.714
+[gpub002:0/64] 2023-07-13 22:17:31,236 (trainer:732) INFO: 46epoch:train:4601-4700batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=72.378, loss_att=55.337, acc=0.710, loss=60.450, backward_time=1.028, grad_norm=127.410, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.302e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 22:19:46,893 (trainer:732) INFO: 46epoch:train:4701-4800batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=68.304, loss_att=52.814, acc=0.705, loss=57.461, backward_time=1.027, grad_norm=115.040, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.302e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 22:22:02,247 (trainer:732) INFO: 46epoch:train:4801-4900batch: iter_time=1.396e-04, forward_time=0.145, loss_ctc=62.431, loss_att=43.727, acc=0.729, loss=49.338, backward_time=1.025, grad_norm=120.051, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.301e-05, train_time=2.707
+[gpub002:0/64] 2023-07-13 22:24:18,146 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-13 22:24:36,789 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac349cf8e0>)
+[gpub002:0/64] 2023-07-13 22:24:40,257 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-13 22:24:40,263 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 22:31:08,903 (trainer:732) INFO: 46epoch:train:4901-5000batch: iter_time=1.286, forward_time=0.146, loss_ctc=71.786, loss_att=49.940, acc=0.728, loss=56.494, backward_time=1.039, grad_norm=110.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=10.932
+[gpub002:0/64] 2023-07-13 22:33:27,419 (trainer:732) INFO: 46epoch:train:5001-5100batch: iter_time=1.223e-04, forward_time=0.149, loss_ctc=64.751, loss_att=53.246, acc=0.721, loss=56.697, backward_time=1.037, grad_norm=99.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.300e-05, train_time=2.771
+[gpub002:0/64] 2023-07-13 22:35:43,951 (trainer:732) INFO: 46epoch:train:5101-5200batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=72.158, loss_att=51.378, acc=0.713, loss=57.612, backward_time=1.028, grad_norm=111.287, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 22:37:59,756 (trainer:732) INFO: 46epoch:train:5201-5300batch: iter_time=1.149e-04, forward_time=0.145, loss_ctc=67.483, loss_att=47.864, acc=0.735, loss=53.750, backward_time=1.028, grad_norm=125.499, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.299e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 22:40:23,841 (trainer:732) INFO: 46epoch:train:5301-5400batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=65.944, loss_att=51.781, acc=0.714, loss=56.030, backward_time=1.042, grad_norm=118.487, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.298e-05, train_time=2.881
+[gpub002:0/64] 2023-07-13 22:42:39,494 (trainer:732) INFO: 46epoch:train:5401-5500batch: iter_time=1.323e-04, forward_time=0.145, loss_ctc=68.524, loss_att=52.761, acc=0.713, loss=57.490, backward_time=1.028, grad_norm=118.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 22:44:55,465 (trainer:732) INFO: 46epoch:train:5501-5600batch: iter_time=1.347e-04, forward_time=0.146, loss_ctc=75.505, loss_att=56.510, acc=0.706, loss=62.208, backward_time=1.031, grad_norm=124.408, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.297e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 22:47:10,920 (trainer:732) INFO: 46epoch:train:5601-5700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=62.459, loss_att=43.488, acc=0.724, loss=49.180, backward_time=1.026, grad_norm=107.132, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.296e-05, train_time=2.709
+[gpub002:0/64] 2023-07-13 22:49:26,868 (trainer:732) INFO: 46epoch:train:5701-5800batch: iter_time=1.384e-04, forward_time=0.146, loss_ctc=72.636, loss_att=50.457, acc=0.737, loss=57.111, backward_time=1.030, grad_norm=130.378, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.296e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 22:50:15,326 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-13 22:50:33,588 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd4c804af0>)
+[gpub002:0/64] 2023-07-13 22:50:37,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-13 22:50:37,042 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 22:58:22,565 (trainer:732) INFO: 46epoch:train:5801-5900batch: iter_time=1.406, forward_time=0.196, loss_ctc=70.088, loss_att=56.947, acc=0.697, loss=60.889, backward_time=1.041, grad_norm=108.830, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.295e-05, train_time=10.713
+[gpub002:0/64] 2023-07-13 23:00:39,434 (trainer:732) INFO: 46epoch:train:5901-6000batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=69.077, loss_att=49.338, acc=0.718, loss=55.259, backward_time=1.029, grad_norm=127.466, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.295e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 23:02:54,972 (trainer:732) INFO: 46epoch:train:6001-6100batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=66.432, loss_att=45.825, acc=0.735, loss=52.007, backward_time=1.025, grad_norm=111.877, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.294e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 23:05:10,758 (trainer:732) INFO: 46epoch:train:6101-6200batch: iter_time=1.257e-04, forward_time=0.146, loss_ctc=68.644, loss_att=53.212, acc=0.712, loss=57.841, backward_time=1.028, grad_norm=126.634, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.293e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 23:07:27,287 (trainer:732) INFO: 46epoch:train:6201-6300batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=68.525, loss_att=54.107, acc=0.705, loss=58.432, backward_time=1.032, grad_norm=130.338, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.293e-05, train_time=2.730
+[gpub002:0/64] 2023-07-13 23:09:42,933 (trainer:732) INFO: 46epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=73.030, loss_att=55.687, acc=0.706, loss=60.890, backward_time=1.028, grad_norm=122.293, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 23:11:58,473 (trainer:732) INFO: 46epoch:train:6401-6500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=62.489, loss_att=42.039, acc=0.728, loss=48.174, backward_time=1.027, grad_norm=110.857, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.292e-05, train_time=2.711
+[gpub002:0/64] 2023-07-13 23:14:18,395 (trainer:732) INFO: 46epoch:train:6501-6600batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.293, loss_att=50.054, acc=0.730, loss=56.126, backward_time=1.033, grad_norm=116.507, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.291e-05, train_time=2.798
+[gpub002:0/64] 2023-07-13 23:16:08,051 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-13 23:16:26,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb29dac7790>)
+[gpub002:0/64] 2023-07-13 23:16:29,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-13 23:16:29,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 23:21:49,471 (trainer:732) INFO: 46epoch:train:6601-6700batch: iter_time=3.060, forward_time=0.146, loss_ctc=72.928, loss_att=58.616, acc=0.707, loss=62.910, backward_time=1.044, grad_norm=131.228, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=9.021
+[gpub002:0/64] 2023-07-13 23:24:06,689 (trainer:732) INFO: 46epoch:train:6701-6800batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=72.161, loss_att=56.066, acc=0.711, loss=60.894, backward_time=1.031, grad_norm=141.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.290e-05, train_time=2.744
+[gpub002:0/64] 2023-07-13 23:26:23,450 (trainer:732) INFO: 46epoch:train:6801-6900batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=66.501, loss_att=47.899, acc=0.733, loss=53.480, backward_time=1.029, grad_norm=134.963, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.735
+[gpub002:0/64] 2023-07-13 23:28:50,171 (trainer:732) INFO: 46epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=67.438, loss_att=53.891, acc=0.724, loss=57.955, backward_time=1.034, grad_norm=109.976, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.289e-05, train_time=2.934
+[gpub002:0/64] 2023-07-13 23:31:05,996 (trainer:732) INFO: 46epoch:train:7001-7100batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=66.567, loss_att=49.018, acc=0.721, loss=54.283, backward_time=1.028, grad_norm=118.826, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.288e-05, train_time=2.716
+[gpub002:0/64] 2023-07-13 23:33:21,934 (trainer:732) INFO: 46epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=73.747, loss_att=54.406, acc=0.722, loss=60.208, backward_time=1.029, grad_norm=143.217, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.719
+[gpub002:0/64] 2023-07-13 23:35:37,952 (trainer:732) INFO: 46epoch:train:7201-7300batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=64.890, loss_att=47.420, acc=0.716, loss=52.661, backward_time=1.031, grad_norm=122.847, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.287e-05, train_time=2.720
+[gpub002:0/64] 2023-07-13 23:37:54,841 (trainer:732) INFO: 46epoch:train:7301-7400batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.972, loss_att=48.798, acc=0.732, loss=55.150, backward_time=1.031, grad_norm=107.927, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 23:40:11,039 (trainer:732) INFO: 46epoch:train:7401-7500batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=72.249, loss_att=55.096, acc=0.722, loss=60.242, backward_time=1.030, grad_norm=116.626, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.286e-05, train_time=2.724
+[gpub002:0/64] 2023-07-13 23:40:12,748 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-13 23:40:30,664 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbc827520>)
+[gpub002:0/64] 2023-07-13 23:40:34,096 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-13 23:40:34,102 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-13 23:46:45,686 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-13 23:48:38,986 (trainer:732) INFO: 46epoch:train:7501-7600batch: iter_time=1.321, forward_time=0.191, loss_ctc=65.367, loss_att=55.036, acc=0.722, loss=58.135, backward_time=1.048, grad_norm=119.689, clip=100.000, loss_scale=3.775e+32, optim_step_time=0.186, optim0_lr0=5.285e-05, train_time=10.159
+[gpub002:0/64] 2023-07-13 23:50:55,149 (trainer:732) INFO: 46epoch:train:7601-7700batch: iter_time=1.241e-04, forward_time=0.143, loss_ctc=72.022, loss_att=51.304, acc=0.717, loss=57.519, backward_time=1.028, grad_norm=133.215, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 23:53:11,294 (trainer:732) INFO: 46epoch:train:7701-7800batch: iter_time=1.063e-04, forward_time=0.144, loss_ctc=67.643, loss_att=47.760, acc=0.736, loss=53.725, backward_time=1.028, grad_norm=130.713, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.284e-05, train_time=2.723
+[gpub002:0/64] 2023-07-13 23:55:28,212 (trainer:732) INFO: 46epoch:train:7801-7900batch: iter_time=1.197e-04, forward_time=0.145, loss_ctc=64.828, loss_att=52.504, acc=0.715, loss=56.201, backward_time=1.026, grad_norm=142.216, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.738
+[gpub002:0/64] 2023-07-13 23:57:43,849 (trainer:732) INFO: 46epoch:train:7901-8000batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=67.229, loss_att=51.224, acc=0.726, loss=56.026, backward_time=1.027, grad_norm=115.103, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.283e-05, train_time=2.713
+[gpub002:0/64] 2023-07-13 23:59:59,805 (trainer:732) INFO: 46epoch:train:8001-8100batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=73.639, loss_att=56.195, acc=0.717, loss=61.428, backward_time=1.028, grad_norm=125.297, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.282e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 00:02:20,477 (trainer:732) INFO: 46epoch:train:8101-8200batch: iter_time=1.146e-04, forward_time=0.166, loss_ctc=62.564, loss_att=44.204, acc=0.720, loss=49.712, backward_time=1.037, grad_norm=108.944, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.282e-05, train_time=2.813
+[gpub002:0/64] 2023-07-14 00:04:43,201 (trainer:732) INFO: 46epoch:train:8201-8300batch: iter_time=1.046e-04, forward_time=0.150, loss_ctc=71.384, loss_att=50.456, acc=0.737, loss=56.734, backward_time=1.035, grad_norm=124.346, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.281e-05, train_time=2.854
+[gpub002:0/64] 2023-07-14 00:05:53,094 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-14 00:06:11,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac5a50f4f0>)
+[gpub002:0/64] 2023-07-14 00:06:14,920 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-14 00:06:14,927 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 00:13:47,812 (trainer:732) INFO: 46epoch:train:8301-8400batch: iter_time=3.876, forward_time=0.196, loss_ctc=69.752, loss_att=55.635, acc=0.712, loss=59.870, backward_time=1.068, grad_norm=118.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.280e-05, train_time=10.891
+[gpub002:0/64] 2023-07-14 00:16:05,245 (trainer:732) INFO: 46epoch:train:8401-8500batch: iter_time=1.318e-04, forward_time=0.145, loss_ctc=67.335, loss_att=48.827, acc=0.725, loss=54.379, backward_time=1.027, grad_norm=107.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.280e-05, train_time=2.749
+[gpub002:0/64] 2023-07-14 00:18:21,297 (trainer:732) INFO: 46epoch:train:8501-8600batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=67.132, loss_att=46.641, acc=0.733, loss=52.788, backward_time=1.028, grad_norm=107.093, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 00:20:42,957 (trainer:732) INFO: 46epoch:train:8601-8700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=66.963, loss_att=52.861, acc=0.720, loss=57.092, backward_time=1.048, grad_norm=130.169, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.279e-05, train_time=2.833
+[gpub002:0/64] 2023-07-14 00:23:00,099 (trainer:732) INFO: 46epoch:train:8701-8800batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=70.472, loss_att=52.762, acc=0.723, loss=58.075, backward_time=1.032, grad_norm=139.851, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.278e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 00:25:20,177 (trainer:732) INFO: 46epoch:train:8801-8900batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=71.545, loss_att=54.284, acc=0.718, loss=59.463, backward_time=1.034, grad_norm=122.838, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.801
+[gpub002:0/64] 2023-07-14 00:27:37,496 (trainer:732) INFO: 46epoch:train:8901-9000batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=62.966, loss_att=43.277, acc=0.727, loss=49.184, backward_time=1.032, grad_norm=110.704, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.277e-05, train_time=2.746
+[gpub002:0/64] 2023-07-14 00:29:53,320 (trainer:732) INFO: 46epoch:train:9001-9100batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=71.349, loss_att=50.693, acc=0.732, loss=56.890, backward_time=1.028, grad_norm=112.882, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.276e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 00:31:47,729 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-14 00:32:06,387 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fac12703b50>)
+[gpub002:0/64] 2023-07-14 00:32:09,845 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 00:32:09,852 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 00:38:23,995 (trainer:732) INFO: 46epoch:train:9101-9200batch: iter_time=3.641, forward_time=0.187, loss_ctc=72.415, loss_att=55.674, acc=0.717, loss=60.696, backward_time=1.057, grad_norm=127.477, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.276e-05, train_time=10.213
+[gpub002:0/64] 2023-07-14 00:40:40,945 (trainer:732) INFO: 46epoch:train:9201-9300batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=67.604, loss_att=57.124, acc=0.707, loss=60.268, backward_time=1.028, grad_norm=127.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.275e-05, train_time=2.739
+[gpub002:0/64] 2023-07-14 00:42:57,963 (trainer:732) INFO: 46epoch:train:9301-9400batch: iter_time=1.030e-04, forward_time=0.143, loss_ctc=66.251, loss_att=46.442, acc=0.724, loss=52.385, backward_time=1.027, grad_norm=112.768, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.740
+[gpub002:0/64] 2023-07-14 00:45:13,954 (trainer:732) INFO: 46epoch:train:9401-9500batch: iter_time=1.018e-04, forward_time=0.144, loss_ctc=67.687, loss_att=51.450, acc=0.727, loss=56.321, backward_time=1.026, grad_norm=130.442, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.274e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 00:47:30,437 (trainer:732) INFO: 46epoch:train:9501-9600batch: iter_time=1.065e-04, forward_time=0.147, loss_ctc=66.976, loss_att=48.132, acc=0.723, loss=53.785, backward_time=1.028, grad_norm=111.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.273e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 00:49:46,162 (trainer:732) INFO: 46epoch:train:9601-9700batch: iter_time=1.500e-04, forward_time=0.146, loss_ctc=73.129, loss_att=56.047, acc=0.713, loss=61.172, backward_time=1.028, grad_norm=121.197, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.273e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 00:52:01,715 (trainer:732) INFO: 46epoch:train:9701-9800batch: iter_time=1.470e-04, forward_time=0.146, loss_ctc=68.415, loss_att=54.013, acc=0.702, loss=58.333, backward_time=1.027, grad_norm=120.706, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 00:54:17,220 (trainer:732) INFO: 46epoch:train:9801-9900batch: iter_time=1.497e-04, forward_time=0.146, loss_ctc=63.092, loss_att=45.005, acc=0.726, loss=50.431, backward_time=1.027, grad_norm=115.827, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.272e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 00:56:33,163 (trainer:732) INFO: 46epoch:train:9901-10000batch: iter_time=1.520e-04, forward_time=0.147, loss_ctc=71.355, loss_att=49.992, acc=0.729, loss=56.401, backward_time=1.029, grad_norm=110.394, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.271e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 01:10:58,653 (trainer:338) INFO: 46epoch results: [train] iter_time=0.292, forward_time=0.151, loss_ctc=68.954, loss_att=51.397, acc=0.719, loss=56.664, backward_time=1.032, grad_norm=125.536, clip=100.000, loss_scale=4.322e+32, optim_step_time=0.183, optim0_lr0=5.300e-05, train_time=3.565, time=4 hours, 57 minutes and 19.06 seconds, total_count=430000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.169, cer_ctc=0.254, loss_att=38.108, acc=0.681, cer=0.392, wer=0.992, loss=39.627, time=8 minutes and 21.07 seconds, total_count=44022, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 49.73 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-14 01:11:14,472 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/41epoch.pth
+[gpub002:0/64] 2023-07-14 01:11:14,537 (trainer:272) INFO: 47/50epoch started. Estimated time to finish: 20 hours, 29 minutes and 24.2 seconds
+[gpub002:0/64] 2023-07-14 01:11:14,540 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-14 01:11:32,416 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb6abec2a70>)
+[gpub002:0/64] 2023-07-14 01:11:35,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 01:11:35,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 01:15:48,585 (trainer:732) INFO: 47epoch:train:1-100batch: iter_time=1.312, forward_time=0.182, loss_ctc=75.825, loss_att=61.109, acc=0.689, loss=65.524, backward_time=1.041, grad_norm=158.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.270e-05, train_time=5.481
+[gpub002:0/64] 2023-07-14 01:18:27,141 (trainer:732) INFO: 47epoch:train:101-200batch: iter_time=8.032e-04, forward_time=0.180, loss_ctc=64.299, loss_att=48.764, acc=0.701, loss=53.424, backward_time=1.052, grad_norm=127.674, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.270e-05, train_time=3.170
+[gpub002:0/64] 2023-07-14 01:20:57,571 (trainer:732) INFO: 47epoch:train:201-300batch: iter_time=1.242e-04, forward_time=0.156, loss_ctc=68.624, loss_att=50.174, acc=0.700, loss=55.709, backward_time=1.042, grad_norm=132.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.269e-05, train_time=3.010
+[gpub002:0/64] 2023-07-14 01:23:21,420 (trainer:732) INFO: 47epoch:train:301-400batch: iter_time=1.294e-04, forward_time=0.147, loss_ctc=73.786, loss_att=55.907, acc=0.690, loss=61.271, backward_time=1.039, grad_norm=144.858, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.269e-05, train_time=2.876
+[gpub002:0/64] 2023-07-14 01:25:56,177 (trainer:732) INFO: 47epoch:train:401-500batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=65.105, loss_att=48.139, acc=0.706, loss=53.229, backward_time=1.057, grad_norm=144.107, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.268e-05, train_time=3.096
+[gpub002:0/64] 2023-07-14 01:28:14,232 (trainer:732) INFO: 47epoch:train:501-600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=71.910, loss_att=51.661, acc=0.724, loss=57.735, backward_time=1.032, grad_norm=145.246, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.761
+[gpub002:0/64] 2023-07-14 01:30:34,563 (trainer:732) INFO: 47epoch:train:601-700batch: iter_time=1.124e-04, forward_time=0.143, loss_ctc=66.419, loss_att=46.645, acc=0.717, loss=52.577, backward_time=1.042, grad_norm=116.967, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.267e-05, train_time=2.806
+[gpub002:0/64] 2023-07-14 01:32:55,156 (trainer:732) INFO: 47epoch:train:701-800batch: iter_time=1.111e-04, forward_time=0.144, loss_ctc=75.585, loss_att=57.869, acc=0.701, loss=63.184, backward_time=1.031, grad_norm=132.340, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.266e-05, train_time=2.812
+[gpub002:0/64] 2023-07-14 01:33:50,705 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-14 01:34:08,334 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40917c40>)
+[gpub002:0/64] 2023-07-14 01:34:11,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-14 01:34:11,703 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 01:40:02,226 (trainer:732) INFO: 47epoch:train:801-900batch: iter_time=2.580, forward_time=0.174, loss_ctc=75.755, loss_att=60.417, acc=0.703, loss=65.018, backward_time=1.045, grad_norm=133.637, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.266e-05, train_time=8.541
+[gpub002:0/64] 2023-07-14 01:42:20,381 (trainer:732) INFO: 47epoch:train:901-1000batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=69.051, loss_att=52.908, acc=0.716, loss=57.751, backward_time=1.030, grad_norm=128.359, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.265e-05, train_time=2.763
+[gpub002:0/64] 2023-07-14 01:44:36,647 (trainer:732) INFO: 47epoch:train:1001-1100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=67.260, loss_att=48.787, acc=0.713, loss=54.329, backward_time=1.033, grad_norm=133.083, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.725
+[gpub002:0/64] 2023-07-14 01:46:52,658 (trainer:732) INFO: 47epoch:train:1101-1200batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=69.474, loss_att=52.446, acc=0.702, loss=57.555, backward_time=1.027, grad_norm=149.543, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.264e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 01:49:08,730 (trainer:732) INFO: 47epoch:train:1201-1300batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=63.276, loss_att=48.489, acc=0.718, loss=52.925, backward_time=1.028, grad_norm=139.425, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 01:51:24,328 (trainer:732) INFO: 47epoch:train:1301-1400batch: iter_time=1.570e-04, forward_time=0.145, loss_ctc=72.456, loss_att=49.418, acc=0.727, loss=56.329, backward_time=1.028, grad_norm=130.808, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.263e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 01:53:40,003 (trainer:732) INFO: 47epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=63.452, loss_att=48.009, acc=0.722, loss=52.642, backward_time=1.027, grad_norm=121.157, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 01:55:56,022 (trainer:732) INFO: 47epoch:train:1501-1600batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=78.127, loss_att=57.946, acc=0.718, loss=64.000, backward_time=1.030, grad_norm=110.143, clip=100.000, loss_scale=5.906e+32, optim_step_time=0.183, optim0_lr0=5.262e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 01:57:27,328 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-14 01:57:45,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3033bfd0>)
+[gpub002:0/64] 2023-07-14 01:57:48,763 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-14 01:57:48,769 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 02:02:30,932 (trainer:732) INFO: 47epoch:train:1601-1700batch: iter_time=1.331, forward_time=0.182, loss_ctc=77.305, loss_att=58.961, acc=0.710, loss=64.464, backward_time=1.043, grad_norm=114.992, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.261e-05, train_time=7.898
+[gpub002:0/64] 2023-07-14 02:04:47,565 (trainer:732) INFO: 47epoch:train:1701-1800batch: iter_time=1.311e-04, forward_time=0.146, loss_ctc=68.654, loss_att=53.205, acc=0.712, loss=57.840, backward_time=1.031, grad_norm=151.268, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.260e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 02:07:03,513 (trainer:732) INFO: 47epoch:train:1801-1900batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=65.765, loss_att=46.697, acc=0.724, loss=52.417, backward_time=1.029, grad_norm=120.537, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.260e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 02:09:20,232 (trainer:732) INFO: 47epoch:train:1901-2000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=67.723, loss_att=50.397, acc=0.716, loss=55.595, backward_time=1.030, grad_norm=131.389, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.734
+[gpub002:0/64] 2023-07-14 02:11:36,110 (trainer:732) INFO: 47epoch:train:2001-2100batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=69.804, loss_att=52.306, acc=0.713, loss=57.555, backward_time=1.028, grad_norm=112.445, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.259e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 02:13:51,747 (trainer:732) INFO: 47epoch:train:2101-2200batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=69.056, loss_att=49.240, acc=0.712, loss=55.185, backward_time=1.025, grad_norm=113.889, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 02:16:07,448 (trainer:732) INFO: 47epoch:train:2201-2300batch: iter_time=1.067e-04, forward_time=0.144, loss_ctc=67.405, loss_att=49.335, acc=0.732, loss=54.756, backward_time=1.026, grad_norm=125.340, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.258e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 02:18:23,460 (trainer:732) INFO: 47epoch:train:2301-2400batch: iter_time=1.052e-04, forward_time=0.145, loss_ctc=71.576, loss_att=54.166, acc=0.723, loss=59.389, backward_time=1.028, grad_norm=130.633, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.257e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 02:20:48,284 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-14 02:21:06,460 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 02:21:09,867 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad3ff7f460>)
+[gpub002:0/64] 2023-07-14 02:21:09,868 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-14 02:21:09,874 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 02:27:11,938 (trainer:732) INFO: 47epoch:train:2401-2500batch: iter_time=1.320, forward_time=0.168, loss_ctc=72.393, loss_att=54.066, acc=0.713, loss=59.565, backward_time=1.066, grad_norm=115.635, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=10.569
+[gpub002:0/64] 2023-07-14 02:29:29,559 (trainer:732) INFO: 47epoch:train:2501-2600batch: iter_time=1.496e-04, forward_time=0.146, loss_ctc=73.743, loss_att=59.618, acc=0.696, loss=63.855, backward_time=1.035, grad_norm=134.162, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.256e-05, train_time=2.752
+[gpub002:0/64] 2023-07-14 02:31:45,140 (trainer:732) INFO: 47epoch:train:2601-2700batch: iter_time=1.647e-04, forward_time=0.145, loss_ctc=63.864, loss_att=48.114, acc=0.703, loss=52.839, backward_time=1.028, grad_norm=120.583, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 02:34:00,714 (trainer:732) INFO: 47epoch:train:2701-2800batch: iter_time=1.460e-04, forward_time=0.146, loss_ctc=68.746, loss_att=49.931, acc=0.705, loss=55.575, backward_time=1.027, grad_norm=150.817, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.255e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 02:36:18,845 (trainer:732) INFO: 47epoch:train:2801-2900batch: iter_time=1.734e-04, forward_time=0.168, loss_ctc=70.481, loss_att=52.577, acc=0.699, loss=57.948, backward_time=1.029, grad_norm=134.419, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.184, optim0_lr0=5.254e-05, train_time=2.762
+[gpub002:0/64] 2023-07-14 02:38:42,459 (trainer:732) INFO: 47epoch:train:2901-3000batch: iter_time=1.060e-04, forward_time=0.144, loss_ctc=64.475, loss_att=47.169, acc=0.714, loss=52.361, backward_time=1.057, grad_norm=124.697, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.253e-05, train_time=2.872
+[gpub002:0/64] 2023-07-14 02:41:05,117 (trainer:732) INFO: 47epoch:train:3001-3100batch: iter_time=3.092e-04, forward_time=0.157, loss_ctc=72.537, loss_att=51.195, acc=0.726, loss=57.597, backward_time=1.039, grad_norm=137.411, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.253e-05, train_time=2.853
+[gpub002:0/64] 2023-07-14 02:43:46,956 (trainer:732) INFO: 47epoch:train:3101-3200batch: iter_time=0.002, forward_time=0.194, loss_ctc=64.519, loss_att=46.475, acc=0.719, loss=51.888, backward_time=1.090, grad_norm=114.179, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.200, optim0_lr0=5.252e-05, train_time=3.236
+[gpub002:0/64] 2023-07-14 02:44:01,707 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 02:46:11,080 (trainer:732) INFO: 47epoch:train:3201-3300batch: iter_time=1.368e-04, forward_time=0.146, loss_ctc=75.702, loss_att=56.674, acc=0.706, loss=62.383, backward_time=1.046, grad_norm=152.557, clip=100.000, loss_scale=3.510e+32, optim_step_time=0.183, optim0_lr0=5.252e-05, train_time=2.883
+[gpub002:0/64] 2023-07-14 02:47:14,725 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-14 02:47:32,956 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc619be50>)
+[gpub002:0/64] 2023-07-14 02:47:36,422 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-14 02:47:36,472 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 02:54:55,647 (trainer:732) INFO: 47epoch:train:3301-3400batch: iter_time=3.444, forward_time=0.146, loss_ctc=70.929, loss_att=56.270, acc=0.703, loss=60.668, backward_time=1.052, grad_norm=114.309, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=10.491
+[gpub002:0/64] 2023-07-14 02:57:12,121 (trainer:732) INFO: 47epoch:train:3401-3500batch: iter_time=1.342e-04, forward_time=0.145, loss_ctc=63.963, loss_att=47.556, acc=0.723, loss=52.478, backward_time=1.031, grad_norm=101.174, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.251e-05, train_time=2.729
+[gpub002:0/64] 2023-07-14 02:59:28,257 (trainer:732) INFO: 47epoch:train:3501-3600batch: iter_time=1.333e-04, forward_time=0.147, loss_ctc=66.806, loss_att=50.043, acc=0.710, loss=55.072, backward_time=1.029, grad_norm=138.787, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.250e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 03:01:48,809 (trainer:732) INFO: 47epoch:train:3601-3700batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=71.928, loss_att=54.464, acc=0.712, loss=59.703, backward_time=1.036, grad_norm=200.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.811
+[gpub002:0/64] 2023-07-14 03:04:07,361 (trainer:732) INFO: 47epoch:train:3701-3800batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=67.152, loss_att=46.952, acc=0.720, loss=53.012, backward_time=1.030, grad_norm=148.586, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.249e-05, train_time=2.771
+[gpub002:0/64] 2023-07-14 03:06:28,698 (trainer:732) INFO: 47epoch:train:3801-3900batch: iter_time=1.379e-04, forward_time=0.145, loss_ctc=65.566, loss_att=47.542, acc=0.728, loss=52.949, backward_time=1.052, grad_norm=109.596, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.827
+[gpub002:0/64] 2023-07-14 03:08:45,304 (trainer:732) INFO: 47epoch:train:3901-4000batch: iter_time=1.467e-04, forward_time=0.145, loss_ctc=70.262, loss_att=51.105, acc=0.728, loss=56.852, backward_time=1.027, grad_norm=130.886, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.248e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 03:11:01,331 (trainer:732) INFO: 47epoch:train:4001-4100batch: iter_time=1.377e-04, forward_time=0.146, loss_ctc=68.748, loss_att=49.916, acc=0.725, loss=55.566, backward_time=1.029, grad_norm=137.121, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.247e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 03:12:38,470 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-14 03:12:56,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad8c897520>)
+[gpub002:0/64] 2023-07-14 03:13:00,241 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-14 03:13:00,247 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 03:19:05,776 (trainer:732) INFO: 47epoch:train:4101-4200batch: iter_time=1.352, forward_time=0.208, loss_ctc=74.165, loss_att=58.149, acc=0.700, loss=62.954, backward_time=1.172, grad_norm=121.785, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.247e-05, train_time=9.688
+[gpub002:0/64] 2023-07-14 03:21:21,928 (trainer:732) INFO: 47epoch:train:4201-4300batch: iter_time=1.398e-04, forward_time=0.145, loss_ctc=65.068, loss_att=49.135, acc=0.702, loss=53.915, backward_time=1.028, grad_norm=131.373, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.246e-05, train_time=2.723
+[gpub002:0/64] 2023-07-14 03:23:38,012 (trainer:732) INFO: 47epoch:train:4301-4400batch: iter_time=1.409e-04, forward_time=0.144, loss_ctc=67.892, loss_att=51.883, acc=0.698, loss=56.686, backward_time=1.029, grad_norm=124.443, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 03:25:53,885 (trainer:732) INFO: 47epoch:train:4401-4500batch: iter_time=1.313e-04, forward_time=0.144, loss_ctc=68.165, loss_att=48.760, acc=0.707, loss=54.582, backward_time=1.028, grad_norm=148.361, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.245e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 03:28:09,352 (trainer:732) INFO: 47epoch:train:4501-4600batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=62.490, loss_att=47.304, acc=0.713, loss=51.860, backward_time=1.026, grad_norm=114.356, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.709
+[gpub002:0/64] 2023-07-14 03:30:25,107 (trainer:732) INFO: 47epoch:train:4601-4700batch: iter_time=1.325e-04, forward_time=0.144, loss_ctc=73.055, loss_att=52.955, acc=0.722, loss=58.985, backward_time=1.027, grad_norm=135.483, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.244e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 03:32:40,456 (trainer:732) INFO: 47epoch:train:4701-4800batch: iter_time=1.270e-04, forward_time=0.143, loss_ctc=64.160, loss_att=45.152, acc=0.720, loss=50.854, backward_time=1.025, grad_norm=152.783, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.243e-05, train_time=2.707
+[gpub002:0/64] 2023-07-14 03:34:55,883 (trainer:732) INFO: 47epoch:train:4801-4900batch: iter_time=1.281e-04, forward_time=0.144, loss_ctc=75.470, loss_att=57.125, acc=0.710, loss=62.628, backward_time=1.026, grad_norm=128.779, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 03:37:11,333 (trainer:732) INFO: 47epoch:train:4901-5000batch: iter_time=1.184e-04, forward_time=0.145, loss_ctc=68.637, loss_att=53.134, acc=0.699, loss=57.785, backward_time=1.027, grad_norm=135.549, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.242e-05, train_time=2.709
+[gpub002:0/64] 2023-07-14 03:37:13,747 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-14 03:37:32,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabc34c74f0>)
+[gpub002:0/64] 2023-07-14 03:37:35,790 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-14 03:37:35,797 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 03:45:17,483 (trainer:732) INFO: 47epoch:train:5001-5100batch: iter_time=1.349, forward_time=0.238, loss_ctc=74.372, loss_att=58.913, acc=0.696, loss=63.550, backward_time=1.050, grad_norm=124.226, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.241e-05, train_time=9.723
+[gpub002:0/64] 2023-07-14 03:47:33,846 (trainer:732) INFO: 47epoch:train:5101-5200batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=63.535, loss_att=47.253, acc=0.709, loss=52.138, backward_time=1.028, grad_norm=146.127, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.241e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 03:49:56,309 (trainer:732) INFO: 47epoch:train:5201-5300batch: iter_time=1.404e-04, forward_time=0.145, loss_ctc=67.170, loss_att=48.352, acc=0.708, loss=53.997, backward_time=1.032, grad_norm=104.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.849
+[gpub002:0/64] 2023-07-14 03:52:23,539 (trainer:732) INFO: 47epoch:train:5301-5400batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=70.328, loss_att=52.677, acc=0.704, loss=57.973, backward_time=1.041, grad_norm=125.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.240e-05, train_time=2.944
+[gpub002:0/64] 2023-07-14 03:54:39,620 (trainer:732) INFO: 47epoch:train:5401-5500batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=63.753, loss_att=46.245, acc=0.719, loss=51.498, backward_time=1.030, grad_norm=101.661, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.239e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 03:56:55,460 (trainer:732) INFO: 47epoch:train:5501-5600batch: iter_time=1.463e-04, forward_time=0.145, loss_ctc=70.240, loss_att=51.409, acc=0.725, loss=57.058, backward_time=1.029, grad_norm=153.144, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.238e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 03:59:20,845 (trainer:732) INFO: 47epoch:train:5601-5700batch: iter_time=7.024e-04, forward_time=0.186, loss_ctc=63.760, loss_att=44.689, acc=0.728, loss=50.410, backward_time=1.053, grad_norm=104.609, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.238e-05, train_time=2.907
+[gpub002:0/64] 2023-07-14 04:01:56,667 (trainer:732) INFO: 47epoch:train:5701-5800batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=75.062, loss_att=56.213, acc=0.712, loss=61.868, backward_time=1.096, grad_norm=113.891, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.237e-05, train_time=3.117
+[gpub002:0/64] 2023-07-14 04:03:00,524 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-14 04:03:18,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 04:03:22,119 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad41037640>)
+[gpub002:0/64] 2023-07-14 04:03:22,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-14 04:03:22,126 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 04:08:19,987 (trainer:732) INFO: 47epoch:train:5801-5900batch: iter_time=2.282, forward_time=0.213, loss_ctc=75.092, loss_att=60.659, acc=0.705, loss=64.989, backward_time=1.059, grad_norm=119.456, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.237e-05, train_time=7.666
+[gpub002:0/64] 2023-07-14 04:10:36,391 (trainer:732) INFO: 47epoch:train:5901-6000batch: iter_time=1.312e-04, forward_time=0.146, loss_ctc=66.678, loss_att=50.374, acc=0.726, loss=55.266, backward_time=1.030, grad_norm=136.916, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 04:12:52,162 (trainer:732) INFO: 47epoch:train:6001-6100batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=65.131, loss_att=47.955, acc=0.721, loss=53.108, backward_time=1.026, grad_norm=129.089, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.236e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 04:15:07,834 (trainer:732) INFO: 47epoch:train:6101-6200batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=67.860, loss_att=50.576, acc=0.714, loss=55.761, backward_time=1.026, grad_norm=109.883, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.235e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 04:17:23,387 (trainer:732) INFO: 47epoch:train:6201-6300batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=61.925, loss_att=47.562, acc=0.718, loss=51.871, backward_time=1.027, grad_norm=111.806, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.234e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 04:19:46,670 (trainer:732) INFO: 47epoch:train:6301-6400batch: iter_time=0.005, forward_time=0.187, loss_ctc=70.970, loss_att=49.802, acc=0.728, loss=56.152, backward_time=1.043, grad_norm=134.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.234e-05, train_time=2.865
+[gpub002:0/64] 2023-07-14 04:22:02,533 (trainer:732) INFO: 47epoch:train:6401-6500batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=63.773, loss_att=48.033, acc=0.726, loss=52.755, backward_time=1.025, grad_norm=110.767, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 04:24:18,474 (trainer:732) INFO: 47epoch:train:6501-6600batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=76.393, loss_att=57.916, acc=0.722, loss=63.459, backward_time=1.028, grad_norm=126.533, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.233e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 04:25:50,082 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-14 04:26:08,645 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 04:26:12,059 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1264634f0>)
+[gpub002:0/64] 2023-07-14 04:26:12,060 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-14 04:26:12,066 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 04:30:38,669 (trainer:732) INFO: 47epoch:train:6601-6700batch: iter_time=1.313, forward_time=0.145, loss_ctc=74.407, loss_att=56.874, acc=0.714, loss=62.134, backward_time=1.044, grad_norm=148.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=7.604
+[gpub002:0/64] 2023-07-14 04:32:55,250 (trainer:732) INFO: 47epoch:train:6701-6800batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=66.947, loss_att=51.659, acc=0.708, loss=56.246, backward_time=1.029, grad_norm=104.621, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.232e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 04:35:11,755 (trainer:732) INFO: 47epoch:train:6801-6900batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=64.313, loss_att=47.002, acc=0.716, loss=52.196, backward_time=1.028, grad_norm=98.831, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.231e-05, train_time=2.730
+[gpub002:0/64] 2023-07-14 04:37:28,932 (trainer:732) INFO: 47epoch:train:6901-7000batch: iter_time=3.968e-04, forward_time=0.152, loss_ctc=66.847, loss_att=51.004, acc=0.707, loss=55.757, backward_time=1.032, grad_norm=99.645, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 04:39:53,468 (trainer:732) INFO: 47epoch:train:7001-7100batch: iter_time=1.192e-04, forward_time=0.143, loss_ctc=67.687, loss_att=49.843, acc=0.716, loss=55.196, backward_time=1.040, grad_norm=119.497, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.230e-05, train_time=2.891
+[gpub002:0/64] 2023-07-14 04:42:25,260 (trainer:732) INFO: 47epoch:train:7101-7200batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=68.017, loss_att=47.851, acc=0.713, loss=53.900, backward_time=1.067, grad_norm=115.656, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=3.036
+[gpub002:0/64] 2023-07-14 04:44:41,100 (trainer:732) INFO: 47epoch:train:7201-7300batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=66.541, loss_att=47.903, acc=0.728, loss=53.495, backward_time=1.029, grad_norm=118.808, clip=100.000, loss_scale=6.166e+32, optim_step_time=0.183, optim0_lr0=5.229e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 04:46:57,499 (trainer:732) INFO: 47epoch:train:7301-7400batch: iter_time=1.337e-04, forward_time=0.146, loss_ctc=71.183, loss_att=51.940, acc=0.725, loss=57.713, backward_time=1.029, grad_norm=115.971, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.228e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 04:49:48,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-14 04:50:07,256 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbcb5fd90>)
+[gpub002:0/64] 2023-07-14 04:50:10,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-14 04:50:10,710 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 04:54:02,831 (trainer:732) INFO: 47epoch:train:7401-7500batch: iter_time=2.585, forward_time=0.260, loss_ctc=71.936, loss_att=53.052, acc=0.710, loss=58.717, backward_time=1.061, grad_norm=137.288, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.188, optim0_lr0=5.228e-05, train_time=8.505
+[gpub002:0/64] 2023-07-14 04:56:20,415 (trainer:732) INFO: 47epoch:train:7501-7600batch: iter_time=1.183e-04, forward_time=0.146, loss_ctc=68.399, loss_att=53.056, acc=0.694, loss=57.658, backward_time=1.034, grad_norm=124.323, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.227e-05, train_time=2.753
+[gpub002:0/64] 2023-07-14 04:58:36,815 (trainer:732) INFO: 47epoch:train:7601-7700batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=67.099, loss_att=48.016, acc=0.713, loss=53.741, backward_time=1.027, grad_norm=111.682, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.226e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 05:00:53,216 (trainer:732) INFO: 47epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.144, loss_ctc=67.867, loss_att=52.326, acc=0.704, loss=56.988, backward_time=1.029, grad_norm=134.364, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.183, optim0_lr0=5.226e-05, train_time=2.728
+[gpub002:0/64] 2023-07-14 05:01:20,178 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 05:03:08,612 (trainer:732) INFO: 47epoch:train:7801-7900batch: iter_time=1.185e-04, forward_time=0.144, loss_ctc=63.118, loss_att=45.662, acc=0.720, loss=50.899, backward_time=1.027, grad_norm=101.710, clip=100.000, loss_scale=3.841e+32, optim_step_time=0.182, optim0_lr0=5.225e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 05:05:24,239 (trainer:732) INFO: 47epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.145, loss_ctc=70.809, loss_att=49.978, acc=0.711, loss=56.227, backward_time=1.028, grad_norm=119.098, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.225e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 05:07:39,760 (trainer:732) INFO: 47epoch:train:8001-8100batch: iter_time=1.291e-04, forward_time=0.145, loss_ctc=65.678, loss_att=48.315, acc=0.720, loss=53.524, backward_time=1.028, grad_norm=121.079, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 05:09:55,413 (trainer:732) INFO: 47epoch:train:8101-8200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=72.102, loss_att=52.782, acc=0.728, loss=58.578, backward_time=1.027, grad_norm=124.500, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.224e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 05:12:11,383 (trainer:732) INFO: 47epoch:train:8201-8300batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=70.788, loss_att=52.774, acc=0.710, loss=58.178, backward_time=1.031, grad_norm=115.453, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.223e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 05:12:57,530 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-14 05:13:15,615 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbb5e2fba90>)
+[gpub002:0/64] 2023-07-14 05:13:19,325 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-14 05:13:19,331 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 05:19:34,885 (trainer:732) INFO: 47epoch:train:8301-8400batch: iter_time=1.320, forward_time=0.157, loss_ctc=69.342, loss_att=56.350, acc=0.696, loss=60.247, backward_time=1.040, grad_norm=126.658, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.222e-05, train_time=8.870
+[gpub002:0/64] 2023-07-14 05:21:52,367 (trainer:732) INFO: 47epoch:train:8401-8500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.969, loss_att=47.931, acc=0.724, loss=52.742, backward_time=1.031, grad_norm=107.000, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.222e-05, train_time=2.749
+[gpub002:0/64] 2023-07-14 05:24:08,298 (trainer:732) INFO: 47epoch:train:8501-8600batch: iter_time=1.100e-04, forward_time=0.144, loss_ctc=65.267, loss_att=48.804, acc=0.713, loss=53.743, backward_time=1.027, grad_norm=121.119, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 05:26:24,844 (trainer:732) INFO: 47epoch:train:8601-8700batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=72.559, loss_att=55.089, acc=0.712, loss=60.330, backward_time=1.029, grad_norm=122.276, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.221e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 05:28:40,559 (trainer:732) INFO: 47epoch:train:8701-8800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=65.539, loss_att=47.000, acc=0.719, loss=52.562, backward_time=1.027, grad_norm=122.552, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 05:30:56,463 (trainer:732) INFO: 47epoch:train:8801-8900batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.154, loss_att=47.927, acc=0.733, loss=53.395, backward_time=1.029, grad_norm=129.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.220e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 05:33:12,008 (trainer:732) INFO: 47epoch:train:8901-9000batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=71.851, loss_att=52.837, acc=0.726, loss=58.541, backward_time=1.027, grad_norm=117.162, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.219e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 05:35:27,593 (trainer:732) INFO: 47epoch:train:9001-9100batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.806, loss_att=51.495, acc=0.721, loss=56.989, backward_time=1.027, grad_norm=118.907, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 05:37:02,534 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-14 05:37:20,733 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fabbcb51060>)
+[gpub002:0/64] 2023-07-14 05:37:24,243 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-14 05:37:24,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 05:41:19,308 (trainer:732) INFO: 47epoch:train:9101-9200batch: iter_time=1.370, forward_time=0.154, loss_ctc=73.500, loss_att=59.056, acc=0.706, loss=63.389, backward_time=1.043, grad_norm=129.829, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.218e-05, train_time=7.034
+[gpub002:0/64] 2023-07-14 05:43:35,917 (trainer:732) INFO: 47epoch:train:9201-9300batch: iter_time=1.274e-04, forward_time=0.145, loss_ctc=66.338, loss_att=51.966, acc=0.717, loss=56.278, backward_time=1.031, grad_norm=120.789, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 05:45:52,711 (trainer:732) INFO: 47epoch:train:9301-9400batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=64.912, loss_att=46.474, acc=0.726, loss=52.005, backward_time=1.029, grad_norm=133.594, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.217e-05, train_time=2.736
+[gpub002:0/64] 2023-07-14 05:48:13,955 (trainer:732) INFO: 47epoch:train:9401-9500batch: iter_time=1.413e-04, forward_time=0.191, loss_ctc=67.262, loss_att=50.534, acc=0.717, loss=55.552, backward_time=1.033, grad_norm=131.021, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.216e-05, train_time=2.825
+[gpub002:0/64] 2023-07-14 05:50:43,138 (trainer:732) INFO: 47epoch:train:9501-9600batch: iter_time=0.001, forward_time=0.230, loss_ctc=68.713, loss_att=50.464, acc=0.725, loss=55.939, backward_time=1.041, grad_norm=113.311, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.200, optim0_lr0=5.216e-05, train_time=2.981
+[gpub002:0/64] 2023-07-14 05:53:04,340 (trainer:732) INFO: 47epoch:train:9601-9700batch: iter_time=1.376e-04, forward_time=0.146, loss_ctc=67.075, loss_att=47.370, acc=0.719, loss=53.282, backward_time=1.032, grad_norm=122.282, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.215e-05, train_time=2.826
+[gpub002:0/64] 2023-07-14 05:55:20,063 (trainer:732) INFO: 47epoch:train:9701-9800batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=67.510, loss_att=48.979, acc=0.734, loss=54.538, backward_time=1.028, grad_norm=124.005, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 05:57:35,588 (trainer:732) INFO: 47epoch:train:9801-9900batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=69.885, loss_att=51.819, acc=0.728, loss=57.239, backward_time=1.028, grad_norm=112.202, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.214e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 05:59:51,428 (trainer:732) INFO: 47epoch:train:9901-10000batch: iter_time=9.828e-05, forward_time=0.144, loss_ctc=71.596, loss_att=53.648, acc=0.720, loss=59.033, backward_time=1.029, grad_norm=136.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.213e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 06:14:05,194 (trainer:338) INFO: 47epoch results: [train] iter_time=0.216, forward_time=0.153, loss_ctc=68.897, loss_att=51.327, acc=0.714, loss=56.598, backward_time=1.037, grad_norm=126.595, clip=100.000, loss_scale=3.991e+32, optim_step_time=0.183, optim0_lr0=5.242e-05, train_time=3.463, time=4 hours, 48 minutes and 59.64 seconds, total_count=440000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=43.333, cer_ctc=0.254, loss_att=36.942, acc=0.674, cer=0.429, wer=0.998, loss=38.859, time=7 minutes and 55.45 seconds, total_count=45034, gpu_max_cached_mem_GB=37.574, [att_plot] time=5 minutes and 55.55 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-14 06:14:21,862 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-14 06:14:21,871 (trainer:272) INFO: 48/50epoch started. Estimated time to finish: 15 hours, 20 minutes and 28.01 seconds
+[gpub002:0/64] 2023-07-14 06:14:22,214 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-14 06:14:41,401 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd7c14f970>)
+[gpub002:0/64] 2023-07-14 06:14:44,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub002:0/64] 2023-07-14 06:14:44,832 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 06:25:13,027 (trainer:732) INFO: 48epoch:train:1-100batch: iter_time=5.066, forward_time=0.185, loss_ctc=72.566, loss_att=50.721, acc=0.708, loss=57.275, backward_time=1.043, grad_norm=138.002, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.198, optim0_lr0=5.213e-05, train_time=13.017
+[gpub002:0/64] 2023-07-14 06:27:29,352 (trainer:732) INFO: 48epoch:train:101-200batch: iter_time=1.335e-04, forward_time=0.145, loss_ctc=77.852, loss_att=63.076, acc=0.696, loss=67.509, backward_time=1.031, grad_norm=128.219, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.726
+[gpub002:0/64] 2023-07-14 06:29:52,948 (trainer:732) INFO: 48epoch:train:201-300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=74.900, loss_att=50.312, acc=0.726, loss=57.688, backward_time=1.027, grad_norm=135.033, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.212e-05, train_time=2.872
+[gpub002:0/64] 2023-07-14 06:32:08,576 (trainer:732) INFO: 48epoch:train:301-400batch: iter_time=1.240e-04, forward_time=0.144, loss_ctc=76.914, loss_att=54.854, acc=0.698, loss=61.472, backward_time=1.026, grad_norm=140.589, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.211e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 06:34:24,747 (trainer:732) INFO: 48epoch:train:401-500batch: iter_time=1.252e-04, forward_time=0.143, loss_ctc=64.489, loss_att=50.286, acc=0.704, loss=54.546, backward_time=1.025, grad_norm=120.535, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.723
+[gpub002:0/64] 2023-07-14 06:36:44,732 (trainer:732) INFO: 48epoch:train:501-600batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=72.098, loss_att=53.893, acc=0.695, loss=59.355, backward_time=1.026, grad_norm=121.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.210e-05, train_time=2.799
+[gpub002:0/64] 2023-07-14 06:39:10,733 (trainer:732) INFO: 48epoch:train:601-700batch: iter_time=0.004, forward_time=0.187, loss_ctc=68.179, loss_att=50.186, acc=0.717, loss=55.584, backward_time=1.038, grad_norm=119.134, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.209e-05, train_time=2.919
+[gpub002:0/64] 2023-07-14 06:41:44,082 (trainer:732) INFO: 48epoch:train:701-800batch: iter_time=1.232e-04, forward_time=0.239, loss_ctc=72.896, loss_att=50.389, acc=0.701, loss=57.141, backward_time=1.045, grad_norm=117.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.209e-05, train_time=3.068
+[gpub002:0/64] 2023-07-14 06:42:40,091 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-14 06:42:57,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb73caf7790>)
+[gpub002:0/64] 2023-07-14 06:43:01,178 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub002:0/64] 2023-07-14 06:43:01,184 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 06:47:36,689 (trainer:732) INFO: 48epoch:train:801-900batch: iter_time=1.777, forward_time=0.193, loss_ctc=75.968, loss_att=53.040, acc=0.714, loss=59.919, backward_time=1.043, grad_norm=123.686, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.208e-05, train_time=7.051
+[gpub002:0/64] 2023-07-14 06:50:07,555 (trainer:732) INFO: 48epoch:train:901-1000batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=80.428, loss_att=59.359, acc=0.718, loss=65.679, backward_time=1.046, grad_norm=134.626, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.208e-05, train_time=3.018
+[gpub002:0/64] 2023-07-14 06:52:23,798 (trainer:732) INFO: 48epoch:train:1001-1100batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.892, loss_att=50.678, acc=0.724, loss=57.942, backward_time=1.031, grad_norm=128.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.725
+[gpub002:0/64] 2023-07-14 06:54:40,151 (trainer:732) INFO: 48epoch:train:1101-1200batch: iter_time=1.110e-04, forward_time=0.143, loss_ctc=74.451, loss_att=54.112, acc=0.722, loss=60.214, backward_time=1.030, grad_norm=147.532, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.207e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 06:56:57,546 (trainer:732) INFO: 48epoch:train:1201-1300batch: iter_time=1.023e-04, forward_time=0.144, loss_ctc=63.785, loss_att=48.558, acc=0.713, loss=53.126, backward_time=1.031, grad_norm=111.642, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.206e-05, train_time=2.748
+[gpub002:0/64] 2023-07-14 06:59:13,500 (trainer:732) INFO: 48epoch:train:1301-1400batch: iter_time=1.072e-04, forward_time=0.144, loss_ctc=72.696, loss_att=53.103, acc=0.718, loss=58.981, backward_time=1.030, grad_norm=130.057, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 07:01:29,121 (trainer:732) INFO: 48epoch:train:1401-1500batch: iter_time=1.109e-04, forward_time=0.143, loss_ctc=68.627, loss_att=48.296, acc=0.723, loss=54.395, backward_time=1.027, grad_norm=116.485, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.205e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 07:03:53,791 (trainer:732) INFO: 48epoch:train:1501-1600batch: iter_time=8.219e-04, forward_time=0.204, loss_ctc=67.797, loss_att=49.570, acc=0.714, loss=55.038, backward_time=1.040, grad_norm=129.227, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.196, optim0_lr0=5.204e-05, train_time=2.893
+[gpub002:0/64] 2023-07-14 07:05:50,238 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-14 07:06:08,419 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40f67f40>)
+[gpub002:0/64] 2023-07-14 07:06:11,812 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-14 07:06:11,818 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 07:11:36,453 (trainer:732) INFO: 48epoch:train:1601-1700batch: iter_time=3.165, forward_time=0.201, loss_ctc=77.679, loss_att=56.373, acc=0.700, loss=62.765, backward_time=1.041, grad_norm=134.671, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.204e-05, train_time=9.252
+[gpub002:0/64] 2023-07-14 07:13:53,755 (trainer:732) INFO: 48epoch:train:1701-1800batch: iter_time=1.167e-04, forward_time=0.147, loss_ctc=67.651, loss_att=49.981, acc=0.716, loss=55.282, backward_time=1.033, grad_norm=139.985, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.747
+[gpub002:0/64] 2023-07-14 07:16:10,215 (trainer:732) INFO: 48epoch:train:1801-1900batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=81.909, loss_att=60.376, acc=0.710, loss=66.836, backward_time=1.031, grad_norm=150.019, clip=100.000, loss_scale=5.841e+32, optim_step_time=0.182, optim0_lr0=5.203e-05, train_time=2.729
+[gpub002:0/64] 2023-07-14 07:18:26,045 (trainer:732) INFO: 48epoch:train:1901-2000batch: iter_time=9.725e-05, forward_time=0.145, loss_ctc=72.694, loss_att=50.879, acc=0.730, loss=57.424, backward_time=1.029, grad_norm=100.349, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.202e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 07:20:41,800 (trainer:732) INFO: 48epoch:train:2001-2100batch: iter_time=9.471e-05, forward_time=0.144, loss_ctc=75.595, loss_att=54.872, acc=0.713, loss=61.089, backward_time=1.028, grad_norm=135.140, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 07:22:57,479 (trainer:732) INFO: 48epoch:train:2101-2200batch: iter_time=8.940e-05, forward_time=0.144, loss_ctc=63.218, loss_att=46.176, acc=0.721, loss=51.288, backward_time=1.029, grad_norm=166.606, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.201e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 07:25:13,488 (trainer:732) INFO: 48epoch:train:2201-2300batch: iter_time=9.836e-05, forward_time=0.146, loss_ctc=70.710, loss_att=53.282, acc=0.717, loss=58.510, backward_time=1.031, grad_norm=133.467, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 07:27:29,318 (trainer:732) INFO: 48epoch:train:2301-2400batch: iter_time=9.628e-05, forward_time=0.144, loss_ctc=69.445, loss_att=49.286, acc=0.727, loss=55.334, backward_time=1.029, grad_norm=133.909, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.200e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 07:28:58,595 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 07:29:44,718 (trainer:732) INFO: 48epoch:train:2401-2500batch: iter_time=9.391e-05, forward_time=0.144, loss_ctc=69.277, loss_att=52.038, acc=0.706, loss=57.209, backward_time=1.028, grad_norm=129.891, clip=100.000, loss_scale=5.364e+32, optim_step_time=0.182, optim0_lr0=5.199e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 07:30:01,213 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-14 07:30:19,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40e64e20>)
+[gpub002:0/64] 2023-07-14 07:30:22,768 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-14 07:30:22,775 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 07:37:23,008 (trainer:732) INFO: 48epoch:train:2501-2600batch: iter_time=3.157, forward_time=0.175, loss_ctc=64.931, loss_att=49.988, acc=0.701, loss=54.471, backward_time=1.040, grad_norm=125.113, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.199e-05, train_time=9.166
+[gpub002:0/64] 2023-07-14 07:39:39,101 (trainer:732) INFO: 48epoch:train:2601-2700batch: iter_time=9.689e-05, forward_time=0.144, loss_ctc=77.176, loss_att=57.787, acc=0.708, loss=63.604, backward_time=1.027, grad_norm=129.649, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 07:41:55,336 (trainer:732) INFO: 48epoch:train:2701-2800batch: iter_time=1.280e-04, forward_time=0.147, loss_ctc=78.705, loss_att=57.307, acc=0.716, loss=63.727, backward_time=1.031, grad_norm=161.522, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.198e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 07:44:11,382 (trainer:732) INFO: 48epoch:train:2801-2900batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=71.666, loss_att=51.698, acc=0.722, loss=57.689, backward_time=1.029, grad_norm=143.465, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.197e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 07:46:27,152 (trainer:732) INFO: 48epoch:train:2901-3000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=67.826, loss_att=52.816, acc=0.696, loss=57.319, backward_time=1.028, grad_norm=112.690, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 07:48:42,648 (trainer:732) INFO: 48epoch:train:3001-3100batch: iter_time=1.373e-04, forward_time=0.146, loss_ctc=62.490, loss_att=46.664, acc=0.714, loss=51.412, backward_time=1.026, grad_norm=110.321, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.196e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 07:50:58,216 (trainer:732) INFO: 48epoch:train:3101-3200batch: iter_time=1.433e-04, forward_time=0.147, loss_ctc=71.138, loss_att=53.409, acc=0.703, loss=58.728, backward_time=1.027, grad_norm=124.701, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 07:53:13,640 (trainer:732) INFO: 48epoch:train:3201-3300batch: iter_time=1.522e-04, forward_time=0.145, loss_ctc=70.454, loss_att=51.309, acc=0.718, loss=57.052, backward_time=1.025, grad_norm=137.045, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.195e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 07:54:01,692 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-14 07:54:19,955 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad414474f0>)
+[gpub002:0/64] 2023-07-14 07:54:23,377 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub002:0/64] 2023-07-14 07:54:23,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 07:59:50,081 (trainer:732) INFO: 48epoch:train:3301-3400batch: iter_time=1.291, forward_time=0.146, loss_ctc=67.251, loss_att=52.714, acc=0.689, loss=57.075, backward_time=1.040, grad_norm=162.610, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=7.929
+[gpub002:0/64] 2023-07-14 08:02:06,659 (trainer:732) INFO: 48epoch:train:3401-3500batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=74.470, loss_att=50.103, acc=0.717, loss=57.413, backward_time=1.030, grad_norm=146.633, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.194e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 08:04:22,707 (trainer:732) INFO: 48epoch:train:3501-3600batch: iter_time=1.132e-04, forward_time=0.144, loss_ctc=78.766, loss_att=59.983, acc=0.709, loss=65.618, backward_time=1.031, grad_norm=139.024, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.193e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 08:06:38,451 (trainer:732) INFO: 48epoch:train:3601-3700batch: iter_time=9.308e-05, forward_time=0.144, loss_ctc=69.199, loss_att=49.395, acc=0.729, loss=55.336, backward_time=1.029, grad_norm=127.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 08:08:54,104 (trainer:732) INFO: 48epoch:train:3701-3800batch: iter_time=1.209e-04, forward_time=0.144, loss_ctc=71.796, loss_att=51.842, acc=0.698, loss=57.828, backward_time=1.028, grad_norm=149.795, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.192e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 08:09:50,838 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 08:11:09,674 (trainer:732) INFO: 48epoch:train:3801-3900batch: iter_time=1.016e-04, forward_time=0.144, loss_ctc=66.375, loss_att=50.166, acc=0.713, loss=55.029, backward_time=1.028, grad_norm=168.422, clip=100.000, loss_scale=2.285e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 08:13:25,076 (trainer:732) INFO: 48epoch:train:3901-4000batch: iter_time=1.125e-04, forward_time=0.143, loss_ctc=72.317, loss_att=55.151, acc=0.699, loss=60.301, backward_time=1.027, grad_norm=131.187, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.191e-05, train_time=2.708
+[gpub002:0/64] 2023-07-14 08:15:40,836 (trainer:732) INFO: 48epoch:train:4001-4100batch: iter_time=1.123e-04, forward_time=0.145, loss_ctc=69.182, loss_att=49.859, acc=0.719, loss=55.656, backward_time=1.029, grad_norm=144.542, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.190e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 08:17:43,061 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub002:0/64] 2023-07-14 08:18:01,046 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad93a2fc70>)
+[gpub002:0/64] 2023-07-14 08:18:04,458 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub002:0/64] 2023-07-14 08:18:04,464 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 08:23:10,162 (trainer:732) INFO: 48epoch:train:4101-4200batch: iter_time=2.902, forward_time=0.247, loss_ctc=68.952, loss_att=51.746, acc=0.708, loss=56.908, backward_time=1.052, grad_norm=116.379, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.220, optim0_lr0=5.190e-05, train_time=8.986
+[gpub002:0/64] 2023-07-14 08:25:28,112 (trainer:732) INFO: 48epoch:train:4201-4300batch: iter_time=1.154e-04, forward_time=0.148, loss_ctc=75.515, loss_att=51.753, acc=0.724, loss=58.882, backward_time=1.034, grad_norm=126.570, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.759
+[gpub002:0/64] 2023-07-14 08:27:44,220 (trainer:732) INFO: 48epoch:train:4301-4400batch: iter_time=1.206e-04, forward_time=0.146, loss_ctc=74.155, loss_att=55.853, acc=0.722, loss=61.344, backward_time=1.031, grad_norm=127.353, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.189e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 08:30:00,444 (trainer:732) INFO: 48epoch:train:4401-4500batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=74.196, loss_att=52.771, acc=0.728, loss=59.198, backward_time=1.032, grad_norm=142.333, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.188e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 08:32:16,273 (trainer:732) INFO: 48epoch:train:4501-4600batch: iter_time=1.142e-04, forward_time=0.145, loss_ctc=69.251, loss_att=50.837, acc=0.715, loss=56.361, backward_time=1.029, grad_norm=131.558, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 08:34:31,949 (trainer:732) INFO: 48epoch:train:4601-4700batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=58.768, loss_att=43.134, acc=0.731, loss=47.825, backward_time=1.028, grad_norm=109.747, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.187e-05, train_time=2.713
+[gpub002:0/64] 2023-07-14 08:36:47,789 (trainer:732) INFO: 48epoch:train:4701-4800batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=71.818, loss_att=54.359, acc=0.714, loss=59.597, backward_time=1.029, grad_norm=139.913, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 08:39:03,632 (trainer:732) INFO: 48epoch:train:4801-4900batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=69.204, loss_att=51.952, acc=0.719, loss=57.128, backward_time=1.029, grad_norm=128.452, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.186e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 08:41:19,530 (trainer:732) INFO: 48epoch:train:4901-5000batch: iter_time=1.175e-04, forward_time=0.146, loss_ctc=76.342, loss_att=54.027, acc=0.704, loss=60.721, backward_time=1.030, grad_norm=125.370, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 08:41:22,274 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub002:0/64] 2023-07-14 08:41:40,677 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad436fba60>)
+[gpub002:0/64] 2023-07-14 08:41:44,120 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub002:0/64] 2023-07-14 08:41:44,127 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 08:47:46,295 (trainer:732) INFO: 48epoch:train:5001-5100batch: iter_time=1.324, forward_time=0.145, loss_ctc=64.448, loss_att=47.570, acc=0.724, loss=52.633, backward_time=1.044, grad_norm=141.410, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.185e-05, train_time=7.735
+[gpub002:0/64] 2023-07-14 08:50:02,881 (trainer:732) INFO: 48epoch:train:5101-5200batch: iter_time=8.791e-05, forward_time=0.143, loss_ctc=76.350, loss_att=54.968, acc=0.718, loss=61.383, backward_time=1.029, grad_norm=120.961, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.184e-05, train_time=2.731
+[gpub002:0/64] 2023-07-14 08:52:20,062 (trainer:732) INFO: 48epoch:train:5201-5300batch: iter_time=8.431e-05, forward_time=0.145, loss_ctc=78.698, loss_att=54.190, acc=0.731, loss=61.542, backward_time=1.034, grad_norm=145.742, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.184e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 08:54:36,566 (trainer:732) INFO: 48epoch:train:5301-5400batch: iter_time=8.884e-05, forward_time=0.144, loss_ctc=71.014, loss_att=50.305, acc=0.733, loss=56.518, backward_time=1.031, grad_norm=149.754, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.183e-05, train_time=2.730
+[gpub002:0/64] 2023-07-14 08:56:54,010 (trainer:732) INFO: 48epoch:train:5401-5500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=66.905, loss_att=50.167, acc=0.717, loss=55.189, backward_time=1.029, grad_norm=107.271, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.749
+[gpub002:0/64] 2023-07-14 08:59:13,151 (trainer:732) INFO: 48epoch:train:5501-5600batch: iter_time=1.282e-04, forward_time=0.145, loss_ctc=62.122, loss_att=44.641, acc=0.726, loss=49.885, backward_time=1.028, grad_norm=133.648, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.182e-05, train_time=2.783
+[gpub002:0/64] 2023-07-14 09:01:33,741 (trainer:732) INFO: 48epoch:train:5601-5700batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=69.735, loss_att=53.019, acc=0.721, loss=58.034, backward_time=1.038, grad_norm=127.138, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.812
+[gpub002:0/64] 2023-07-14 09:03:52,868 (trainer:732) INFO: 48epoch:train:5701-5800batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=69.218, loss_att=50.409, acc=0.725, loss=56.052, backward_time=1.035, grad_norm=115.986, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.181e-05, train_time=2.782
+[gpub002:0/64] 2023-07-14 09:04:41,018 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub002:0/64] 2023-07-14 09:04:59,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad43a33580>)
+[gpub002:0/64] 2023-07-14 09:05:03,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub002:0/64] 2023-07-14 09:05:03,197 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 09:11:40,827 (trainer:732) INFO: 48epoch:train:5801-5900batch: iter_time=3.228, forward_time=0.187, loss_ctc=64.451, loss_att=51.138, acc=0.698, loss=55.132, backward_time=1.041, grad_norm=126.975, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.180e-05, train_time=9.358
+[gpub002:0/64] 2023-07-14 09:13:57,967 (trainer:732) INFO: 48epoch:train:5901-6000batch: iter_time=1.175e-04, forward_time=0.144, loss_ctc=74.538, loss_att=51.602, acc=0.714, loss=58.483, backward_time=1.028, grad_norm=138.164, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.180e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 09:16:16,645 (trainer:732) INFO: 48epoch:train:6001-6100batch: iter_time=7.038e-04, forward_time=0.146, loss_ctc=78.354, loss_att=60.750, acc=0.709, loss=66.031, backward_time=1.033, grad_norm=141.686, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.179e-05, train_time=2.773
+[gpub002:0/64] 2023-07-14 09:18:33,621 (trainer:732) INFO: 48epoch:train:6101-6200batch: iter_time=1.237e-04, forward_time=0.146, loss_ctc=69.005, loss_att=49.078, acc=0.732, loss=55.056, backward_time=1.031, grad_norm=107.431, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.188, optim0_lr0=5.179e-05, train_time=2.739
+[gpub002:0/64] 2023-07-14 09:20:49,845 (trainer:732) INFO: 48epoch:train:6201-6300batch: iter_time=1.292e-04, forward_time=0.145, loss_ctc=71.735, loss_att=50.224, acc=0.706, loss=56.677, backward_time=1.031, grad_norm=145.134, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.178e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 09:23:24,114 (trainer:732) INFO: 48epoch:train:6301-6400batch: iter_time=1.285e-04, forward_time=0.283, loss_ctc=66.846, loss_att=50.323, acc=0.714, loss=55.280, backward_time=1.045, grad_norm=114.520, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.189, optim0_lr0=5.177e-05, train_time=3.085
+[gpub002:0/64] 2023-07-14 09:25:40,456 (trainer:732) INFO: 48epoch:train:6401-6500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.179, loss_att=54.681, acc=0.704, loss=59.930, backward_time=1.029, grad_norm=148.277, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.177e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 09:28:20,556 (trainer:732) INFO: 48epoch:train:6501-6600batch: iter_time=1.396e-04, forward_time=0.146, loss_ctc=67.464, loss_att=48.020, acc=0.724, loss=53.853, backward_time=1.113, grad_norm=125.636, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=3.202
+[gpub002:0/64] 2023-07-14 09:30:11,734 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub002:0/64] 2023-07-14 09:30:29,893 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fae1031fcd0>)
+[gpub002:0/64] 2023-07-14 09:30:33,364 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-14 09:30:33,370 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 09:34:29,820 (trainer:732) INFO: 48epoch:train:6601-6700batch: iter_time=2.210, forward_time=0.145, loss_ctc=65.650, loss_att=48.209, acc=0.712, loss=53.441, backward_time=1.036, grad_norm=123.400, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.176e-05, train_time=7.385
+[gpub002:0/64] 2023-07-14 09:36:46,637 (trainer:732) INFO: 48epoch:train:6701-6800batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=68.915, loss_att=49.606, acc=0.726, loss=55.399, backward_time=1.031, grad_norm=124.451, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.736
+[gpub002:0/64] 2023-07-14 09:39:03,225 (trainer:732) INFO: 48epoch:train:6801-6900batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=79.366, loss_att=60.971, acc=0.714, loss=66.490, backward_time=1.031, grad_norm=177.915, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.175e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 09:41:26,301 (trainer:732) INFO: 48epoch:train:6901-7000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=71.703, loss_att=47.655, acc=0.738, loss=54.869, backward_time=1.033, grad_norm=118.979, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.185, optim0_lr0=5.174e-05, train_time=2.861
+[gpub002:0/64] 2023-07-14 09:43:47,692 (trainer:732) INFO: 48epoch:train:7001-7100batch: iter_time=1.210e-04, forward_time=0.145, loss_ctc=74.135, loss_att=51.319, acc=0.722, loss=58.164, backward_time=1.037, grad_norm=130.583, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.174e-05, train_time=2.828
+[gpub002:0/64] 2023-07-14 09:46:22,408 (trainer:732) INFO: 48epoch:train:7101-7200batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=61.004, loss_att=45.940, acc=0.727, loss=50.459, backward_time=1.063, grad_norm=116.241, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.173e-05, train_time=3.094
+[gpub002:0/64] 2023-07-14 09:48:38,387 (trainer:732) INFO: 48epoch:train:7201-7300batch: iter_time=1.124e-04, forward_time=0.146, loss_ctc=71.858, loss_att=52.784, acc=0.722, loss=58.507, backward_time=1.030, grad_norm=147.551, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 09:51:09,299 (trainer:732) INFO: 48epoch:train:7301-7400batch: iter_time=1.101e-04, forward_time=0.147, loss_ctc=69.932, loss_att=48.151, acc=0.732, loss=54.685, backward_time=1.064, grad_norm=130.713, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.172e-05, train_time=3.018
+[gpub002:0/64] 2023-07-14 09:53:26,312 (trainer:732) INFO: 48epoch:train:7401-7500batch: iter_time=1.054e-04, forward_time=0.146, loss_ctc=67.229, loss_att=50.273, acc=0.714, loss=55.360, backward_time=1.031, grad_norm=147.697, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=2.740
+[gpub002:0/64] 2023-07-14 09:53:28,551 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub002:0/64] 2023-07-14 09:53:46,672 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb1934034f0>)
+[gpub002:0/64] 2023-07-14 09:53:50,086 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 09:53:50,092 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 10:00:46,448 (trainer:732) INFO: 48epoch:train:7501-7600batch: iter_time=1.330, forward_time=0.146, loss_ctc=64.829, loss_att=48.046, acc=0.710, loss=53.081, backward_time=1.043, grad_norm=121.945, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.171e-05, train_time=8.803
+[gpub002:0/64] 2023-07-14 10:03:03,225 (trainer:732) INFO: 48epoch:train:7601-7700batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=73.763, loss_att=54.433, acc=0.716, loss=60.232, backward_time=1.031, grad_norm=121.388, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.735
+[gpub002:0/64] 2023-07-14 10:05:19,335 (trainer:732) INFO: 48epoch:train:7701-7800batch: iter_time=1.329e-04, forward_time=0.145, loss_ctc=77.979, loss_att=56.628, acc=0.718, loss=63.034, backward_time=1.030, grad_norm=145.939, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.170e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 10:07:35,239 (trainer:732) INFO: 48epoch:train:7801-7900batch: iter_time=1.459e-04, forward_time=0.144, loss_ctc=70.685, loss_att=50.240, acc=0.729, loss=56.374, backward_time=1.030, grad_norm=121.249, clip=100.000, loss_scale=2.564e+32, optim_step_time=0.182, optim0_lr0=5.169e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 10:10:05,010 (trainer:732) INFO: 48epoch:train:7901-8000batch: iter_time=1.328e-04, forward_time=0.239, loss_ctc=67.160, loss_att=50.782, acc=0.700, loss=55.695, backward_time=1.051, grad_norm=128.964, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.188, optim0_lr0=5.169e-05, train_time=2.995
+[gpub002:0/64] 2023-07-14 10:12:20,278 (trainer:732) INFO: 48epoch:train:8001-8100batch: iter_time=1.339e-04, forward_time=0.144, loss_ctc=63.723, loss_att=47.622, acc=0.712, loss=52.453, backward_time=1.025, grad_norm=112.150, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.168e-05, train_time=2.705
+[gpub002:0/64] 2023-07-14 10:14:36,085 (trainer:732) INFO: 48epoch:train:8101-8200batch: iter_time=1.193e-04, forward_time=0.144, loss_ctc=74.529, loss_att=54.062, acc=0.708, loss=60.202, backward_time=1.027, grad_norm=108.241, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 10:16:52,011 (trainer:732) INFO: 48epoch:train:8201-8300batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=68.909, loss_att=49.305, acc=0.729, loss=55.186, backward_time=1.028, grad_norm=144.843, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.167e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 10:17:38,915 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub002:0/64] 2023-07-14 10:17:57,252 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fae102e0f70>)
+[gpub002:0/64] 2023-07-14 10:18:00,656 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-14 10:18:00,663 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 10:22:44,622 (trainer:732) INFO: 48epoch:train:8301-8400batch: iter_time=2.087, forward_time=0.144, loss_ctc=66.298, loss_att=48.900, acc=0.708, loss=54.119, backward_time=1.041, grad_norm=129.888, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=7.052
+[gpub002:0/64] 2023-07-14 10:25:04,181 (trainer:732) INFO: 48epoch:train:8401-8500batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=79.695, loss_att=59.887, acc=0.715, loss=65.829, backward_time=1.030, grad_norm=136.298, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.166e-05, train_time=2.791
+[gpub002:0/64] 2023-07-14 10:27:20,855 (trainer:732) INFO: 48epoch:train:8501-8600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=73.605, loss_att=52.399, acc=0.724, loss=58.761, backward_time=1.029, grad_norm=129.008, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.165e-05, train_time=2.733
+[gpub002:0/64] 2023-07-14 10:29:36,805 (trainer:732) INFO: 48epoch:train:8601-8700batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=73.931, loss_att=52.596, acc=0.720, loss=58.997, backward_time=1.027, grad_norm=123.600, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.165e-05, train_time=2.719
+[gpub002:0/64] 2023-07-14 10:31:52,421 (trainer:732) INFO: 48epoch:train:8701-8800batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=62.565, loss_att=48.800, acc=0.699, loss=52.930, backward_time=1.025, grad_norm=111.238, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.164e-05, train_time=2.712
+[gpub002:0/64] 2023-07-14 10:34:08,232 (trainer:732) INFO: 48epoch:train:8801-8900batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=71.599, loss_att=54.096, acc=0.705, loss=59.347, backward_time=1.027, grad_norm=114.515, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.164e-05, train_time=2.716
+[gpub002:0/64] 2023-07-14 10:36:23,781 (trainer:732) INFO: 48epoch:train:8901-9000batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=67.382, loss_att=47.060, acc=0.724, loss=53.156, backward_time=1.026, grad_norm=109.344, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 10:38:39,320 (trainer:732) INFO: 48epoch:train:9001-9100batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=65.844, loss_att=47.748, acc=0.721, loss=53.177, backward_time=1.027, grad_norm=117.156, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.163e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 10:40:20,761 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub002:0/64] 2023-07-14 10:40:38,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad99dd74f0>)
+[gpub002:0/64] 2023-07-14 10:40:42,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub002:0/64] 2023-07-14 10:40:42,516 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 10:44:51,880 (trainer:732) INFO: 48epoch:train:9101-9200batch: iter_time=2.166, forward_time=0.173, loss_ctc=76.442, loss_att=55.319, acc=0.703, loss=61.656, backward_time=1.038, grad_norm=122.819, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.162e-05, train_time=7.451
+[gpub002:0/64] 2023-07-14 10:47:08,550 (trainer:732) INFO: 48epoch:train:9201-9300batch: iter_time=1.199e-04, forward_time=0.144, loss_ctc=68.780, loss_att=49.932, acc=0.724, loss=55.586, backward_time=1.031, grad_norm=107.542, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.161e-05, train_time=2.733
+[gpub002:0/64] 2023-07-14 10:49:26,299 (trainer:732) INFO: 48epoch:train:9301-9400batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=79.867, loss_att=61.875, acc=0.714, loss=67.273, backward_time=1.031, grad_norm=156.270, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.161e-05, train_time=2.755
+[gpub002:0/64] 2023-07-14 10:51:42,933 (trainer:732) INFO: 48epoch:train:9401-9500batch: iter_time=1.196e-04, forward_time=0.145, loss_ctc=72.470, loss_att=48.680, acc=0.737, loss=55.817, backward_time=1.029, grad_norm=122.617, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 10:53:58,937 (trainer:732) INFO: 48epoch:train:9501-9600batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=70.022, loss_att=50.699, acc=0.724, loss=56.496, backward_time=1.029, grad_norm=132.905, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.160e-05, train_time=2.720
+[gpub002:0/64] 2023-07-14 10:56:14,682 (trainer:732) INFO: 48epoch:train:9601-9700batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=60.015, loss_att=45.859, acc=0.727, loss=50.106, backward_time=1.028, grad_norm=143.949, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 10:58:33,862 (trainer:732) INFO: 48epoch:train:9701-9800batch: iter_time=1.277e-04, forward_time=0.146, loss_ctc=70.164, loss_att=53.081, acc=0.720, loss=58.206, backward_time=1.036, grad_norm=114.650, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.159e-05, train_time=2.783
+[gpub002:0/64] 2023-07-14 11:00:52,286 (trainer:732) INFO: 48epoch:train:9801-9900batch: iter_time=1.221e-04, forward_time=0.147, loss_ctc=67.592, loss_att=47.935, acc=0.733, loss=53.832, backward_time=1.034, grad_norm=113.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.768
+[gpub002:0/64] 2023-07-14 11:03:07,869 (trainer:732) INFO: 48epoch:train:9901-10000batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=66.172, loss_att=49.889, acc=0.716, loss=54.774, backward_time=1.028, grad_norm=112.218, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.158e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 11:17:21,394 (trainer:338) INFO: 48epoch results: [train] iter_time=0.297, forward_time=0.153, loss_ctc=70.855, loss_att=51.822, acc=0.716, loss=57.532, backward_time=1.033, grad_norm=130.378, clip=100.000, loss_scale=2.805e+32, optim_step_time=0.183, optim0_lr0=5.185e-05, train_time=3.465, time=4 hours, 49 minutes and 0.35 seconds, total_count=450000, gpu_max_cached_mem_GB=37.574, [valid] loss_ctc=42.674, cer_ctc=0.250, loss_att=37.042, acc=0.682, cer=0.410, wer=0.996, loss=38.731, time=7 minutes and 58.92 seconds, total_count=46046, gpu_max_cached_mem_GB=37.574, [att_plot] time=6 minutes and 0.19 seconds, total_count=0, gpu_max_cached_mem_GB=37.574
+[gpub002:0/64] 2023-07-14 11:17:37,205 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub002:0/64] 2023-07-14 11:17:37,216 (trainer:272) INFO: 49/50epoch started. Estimated time to finish: 10 hours, 12 minutes and 51.12 seconds
+[gpub002:0/64] 2023-07-14 11:17:37,220 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub002:0/64] 2023-07-14 11:17:55,035 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd882bace0>)
+[gpub002:0/64] 2023-07-14 11:17:58,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub002:0/64] 2023-07-14 11:17:58,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 11:23:36,393 (trainer:732) INFO: 49epoch:train:1-100batch: iter_time=1.992, forward_time=0.180, loss_ctc=75.424, loss_att=56.205, acc=0.707, loss=61.970, backward_time=1.067, grad_norm=126.161, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.157e-05, train_time=7.183
+[gpub002:0/64] 2023-07-14 11:25:52,971 (trainer:732) INFO: 49epoch:train:101-200batch: iter_time=1.338e-04, forward_time=0.145, loss_ctc=78.109, loss_att=58.369, acc=0.696, loss=64.291, backward_time=1.030, grad_norm=156.563, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.732
+[gpub002:0/64] 2023-07-14 11:28:10,045 (trainer:732) INFO: 49epoch:train:201-300batch: iter_time=1.411e-04, forward_time=0.145, loss_ctc=71.274, loss_att=53.833, acc=0.706, loss=59.065, backward_time=1.031, grad_norm=117.395, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.156e-05, train_time=2.741
+[gpub002:0/64] 2023-07-14 11:30:33,978 (trainer:732) INFO: 49epoch:train:301-400batch: iter_time=1.382e-04, forward_time=0.143, loss_ctc=82.935, loss_att=67.130, acc=0.686, loss=71.872, backward_time=1.041, grad_norm=143.181, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=2.878
+[gpub002:0/64] 2023-07-14 11:33:11,725 (trainer:732) INFO: 49epoch:train:401-500batch: iter_time=1.370e-04, forward_time=0.144, loss_ctc=67.558, loss_att=49.800, acc=0.725, loss=55.128, backward_time=1.047, grad_norm=137.364, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.155e-05, train_time=3.155
+[gpub002:0/64] 2023-07-14 11:35:47,703 (trainer:732) INFO: 49epoch:train:501-600batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=67.201, loss_att=46.260, acc=0.720, loss=52.542, backward_time=1.045, grad_norm=114.336, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.154e-05, train_time=3.119
+[gpub002:0/64] 2023-07-14 11:38:18,577 (trainer:732) INFO: 49epoch:train:601-700batch: iter_time=1.325e-04, forward_time=0.150, loss_ctc=70.668, loss_att=51.546, acc=0.714, loss=57.283, backward_time=1.040, grad_norm=119.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.154e-05, train_time=3.017
+[gpub002:0/64] 2023-07-14 11:40:45,549 (trainer:732) INFO: 49epoch:train:701-800batch: iter_time=1.343e-04, forward_time=0.144, loss_ctc=61.423, loss_att=43.823, acc=0.717, loss=49.103, backward_time=1.037, grad_norm=107.937, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.153e-05, train_time=2.939
+[gpub002:0/64] 2023-07-14 11:41:43,214 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub002:0/64] 2023-07-14 11:42:01,107 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fbd2e9df160>)
+[gpub002:0/64] 2023-07-14 11:42:04,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub002:0/64] 2023-07-14 11:42:04,480 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 11:48:17,184 (trainer:732) INFO: 49epoch:train:801-900batch: iter_time=2.782, forward_time=0.203, loss_ctc=83.766, loss_att=63.502, acc=0.709, loss=69.582, backward_time=1.053, grad_norm=161.753, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.187, optim0_lr0=5.153e-05, train_time=9.032
+[gpub002:0/64] 2023-07-14 11:50:46,902 (trainer:732) INFO: 49epoch:train:901-1000batch: iter_time=9.525e-05, forward_time=0.144, loss_ctc=74.996, loss_att=53.416, acc=0.702, loss=59.890, backward_time=1.043, grad_norm=131.720, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.994
+[gpub002:0/64] 2023-07-14 11:53:03,014 (trainer:732) INFO: 49epoch:train:1001-1100batch: iter_time=8.920e-05, forward_time=0.144, loss_ctc=71.753, loss_att=56.326, acc=0.702, loss=60.954, backward_time=1.032, grad_norm=130.090, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.152e-05, train_time=2.722
+[gpub002:0/64] 2023-07-14 11:55:18,875 (trainer:732) INFO: 49epoch:train:1101-1200batch: iter_time=1.102e-04, forward_time=0.143, loss_ctc=79.119, loss_att=61.363, acc=0.706, loss=66.690, backward_time=1.030, grad_norm=115.657, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.151e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 11:57:34,399 (trainer:732) INFO: 49epoch:train:1201-1300batch: iter_time=1.012e-04, forward_time=0.144, loss_ctc=72.908, loss_att=53.208, acc=0.712, loss=59.118, backward_time=1.028, grad_norm=115.476, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.150e-05, train_time=2.710
+[gpub002:0/64] 2023-07-14 11:59:49,721 (trainer:732) INFO: 49epoch:train:1301-1400batch: iter_time=1.097e-04, forward_time=0.142, loss_ctc=61.698, loss_att=43.601, acc=0.720, loss=49.030, backward_time=1.027, grad_norm=118.028, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.150e-05, train_time=2.706
+[gpub002:0/64] 2023-07-14 12:02:24,426 (trainer:732) INFO: 49epoch:train:1401-1500batch: iter_time=0.010, forward_time=0.243, loss_ctc=69.799, loss_att=51.631, acc=0.718, loss=57.082, backward_time=1.055, grad_norm=137.188, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.201, optim0_lr0=5.149e-05, train_time=3.093
+[gpub002:0/64] 2023-07-14 12:04:40,243 (trainer:732) INFO: 49epoch:train:1501-1600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=62.191, loss_att=43.167, acc=0.720, loss=48.874, backward_time=1.028, grad_norm=110.624, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.149e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 12:06:21,070 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub002:0/64] 2023-07-14 12:06:39,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fb21fa4e470>)
+[gpub002:0/64] 2023-07-14 12:06:42,623 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub002:0/64] 2023-07-14 12:06:42,629 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 12:10:28,549 (trainer:732) INFO: 49epoch:train:1601-1700batch: iter_time=1.995, forward_time=0.145, loss_ctc=86.888, loss_att=63.993, acc=0.702, loss=70.861, backward_time=1.042, grad_norm=144.375, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.148e-05, train_time=6.966
+[gpub002:0/64] 2023-07-14 12:12:46,571 (trainer:732) INFO: 49epoch:train:1701-1800batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=72.288, loss_att=55.796, acc=0.708, loss=60.743, backward_time=1.033, grad_norm=127.355, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.148e-05, train_time=2.760
+[gpub002:0/64] 2023-07-14 12:15:04,541 (trainer:732) INFO: 49epoch:train:1801-1900batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=72.461, loss_att=50.750, acc=0.721, loss=57.264, backward_time=1.031, grad_norm=124.107, clip=100.000, loss_scale=5.127e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.759
+[gpub002:0/64] 2023-07-14 12:17:21,880 (trainer:732) INFO: 49epoch:train:1901-2000batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=77.931, loss_att=60.068, acc=0.711, loss=65.427, backward_time=1.031, grad_norm=134.506, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.147e-05, train_time=2.747
+[gpub002:0/64] 2023-07-14 12:19:44,196 (trainer:732) INFO: 49epoch:train:2001-2100batch: iter_time=1.327e-04, forward_time=0.145, loss_ctc=74.295, loss_att=58.950, acc=0.728, loss=63.554, backward_time=1.047, grad_norm=139.921, clip=100.000, loss_scale=6.490e+32, optim_step_time=0.182, optim0_lr0=5.146e-05, train_time=2.846
+[gpub002:0/64] 2023-07-14 12:21:47,756 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub002:0/64] 2023-07-14 12:22:28,541 (trainer:732) INFO: 49epoch:train:2101-2200batch: iter_time=2.902e-04, forward_time=0.195, loss_ctc=66.147, loss_att=47.940, acc=0.731, loss=53.402, backward_time=1.122, grad_norm=127.949, clip=100.000, loss_scale=5.497e+32, optim_step_time=0.196, optim0_lr0=5.146e-05, train_time=3.286
+[gpub002:0/64] 2023-07-14 12:24:44,739 (trainer:732) INFO: 49epoch:train:2201-2300batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=63.495, loss_att=43.732, acc=0.732, loss=49.661, backward_time=1.028, grad_norm=107.717, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.145e-05, train_time=2.724
+[gpub002:0/64] 2023-07-14 12:27:00,588 (trainer:732) INFO: 49epoch:train:2301-2400batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=70.905, loss_att=53.186, acc=0.722, loss=58.502, backward_time=1.028, grad_norm=119.707, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.717
+[gpub002:0/64] 2023-07-14 12:29:16,288 (trainer:732) INFO: 49epoch:train:2401-2500batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=71.254, loss_att=49.884, acc=0.724, loss=56.295, backward_time=1.028, grad_norm=137.105, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.144e-05, train_time=2.714
+[gpub002:0/64] 2023-07-14 12:29:36,316 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub002:0/64] 2023-07-14 12:29:55,262 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad40a74730>)
+[gpub002:0/64] 2023-07-14 12:29:58,726 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub002:0/64] 2023-07-14 12:29:58,784 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 12:36:52,242 (trainer:732) INFO: 49epoch:train:2501-2600batch: iter_time=3.018, forward_time=0.183, loss_ctc=77.934, loss_att=56.931, acc=0.709, loss=63.232, backward_time=1.042, grad_norm=160.805, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.143e-05, train_time=9.116
+[gpub002:0/64] 2023-07-14 12:39:09,252 (trainer:732) INFO: 49epoch:train:2601-2700batch: iter_time=1.261e-04, forward_time=0.144, loss_ctc=76.698, loss_att=56.198, acc=0.715, loss=62.348, backward_time=1.031, grad_norm=159.881, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.143e-05, train_time=2.743
+[gpub002:0/64] 2023-07-14 12:41:25,308 (trainer:732) INFO: 49epoch:train:2701-2800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.984, loss_att=50.376, acc=0.725, loss=56.558, backward_time=1.031, grad_norm=114.170, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.721
+[gpub002:0/64] 2023-07-14 12:43:41,674 (trainer:732) INFO: 49epoch:train:2801-2900batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=80.056, loss_att=64.744, acc=0.711, loss=69.337, backward_time=1.033, grad_norm=127.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.142e-05, train_time=2.727
+[gpub002:0/64] 2023-07-14 12:45:57,246 (trainer:732) INFO: 49epoch:train:2901-3000batch: iter_time=1.143e-04, forward_time=0.144, loss_ctc=67.599, loss_att=50.304, acc=0.735, loss=55.492, backward_time=1.027, grad_norm=117.099, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.711
+[gpub002:0/64] 2023-07-14 12:48:13,149 (trainer:732) INFO: 49epoch:train:3001-3100batch: iter_time=1.120e-04, forward_time=0.146, loss_ctc=64.905, loss_att=44.347, acc=0.736, loss=50.514, backward_time=1.030, grad_norm=130.061, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.141e-05, train_time=2.718
+[gpub002:0/64] 2023-07-14 12:50:30,467 (trainer:732) INFO: 49epoch:train:3101-3200batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=68.429, loss_att=50.737, acc=0.731, loss=56.045, backward_time=1.028, grad_norm=111.368, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.746
+[gpub002:0/64] 2023-07-14 12:52:46,238 (trainer:732) INFO: 49epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.144, loss_ctc=62.564, loss_att=44.638, acc=0.725, loss=50.016, backward_time=1.028, grad_norm=110.576, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.140e-05, train_time=2.715
+[gpub002:0/64] 2023-07-14 12:53:46,523 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub002:0/64] 2023-07-14 12:54:05,029 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fad5b3637f0>)
+[gpub002:0/64] 2023-07-14 12:54:08,395 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub002:0/64] 2023-07-14 12:54:08,401 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub002:0/64] 2023-07-14 12:59:16,516 (trainer:732) INFO: 49epoch:train:3301-3400batch: iter_time=1.682, forward_time=0.171, loss_ctc=82.313, loss_att=58.467, acc=0.718, loss=65.621, backward_time=1.042, grad_norm=192.097, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.139e-05, train_time=7.805
+[gpub002:0/64] 2023-07-14 13:02:14,955 (trainer:732) INFO: 49epoch:train:3401-3500batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=72.811, loss_att=56.064, acc=0.705, loss=61.088, backward_time=1.086, grad_norm=128.058, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.569
+[gpub002:0/64] 2023-07-14 13:05:10,836 (trainer:732) INFO: 49epoch:train:3501-3600batch: iter_time=1.228e-04, forward_time=0.146, loss_ctc=70.741, loss_att=51.039, acc=0.720, loss=56.950, backward_time=1.079, grad_norm=144.537, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.138e-05, train_time=3.517
+[gpub002:0/64] 2023-07-14 13:07:59,359 (trainer:732) INFO: 49epoch:train:3601-3700batch: iter_time=1.242e-04, forward_time=0.144, loss_ctc=82.023, loss_att=65.980, acc=0.690, loss=70.793, backward_time=1.065, grad_norm=119.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.370
+[gpub002:0/64] 2023-07-14 13:10:33,803 (trainer:732) INFO: 49epoch:train:3701-3800batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=68.382, loss_att=49.269, acc=0.731, loss=55.003, backward_time=1.040, grad_norm=119.342, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.137e-05, train_time=3.089
+[gpub002:0/64] 2023-07-14 13:13:52,522 (trainer:732) INFO: 49epoch:train:3801-3900batch: iter_time=1.390e-04, forward_time=0.146, loss_ctc=70.071, loss_att=52.760, acc=0.711, loss=57.953, backward_time=1.081, grad_norm=133.837, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.136e-05, train_time=3.974
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
+slurmstepd: error: *** STEP 2147805.0 ON gpub002 CANCELLED AT 2023-07-14T13:15:07 DUE TO TIME LIMIT ***
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.4.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.4.log
new file mode 100644
index 0000000000000000000000000000000000000000..eb703b4bb0ecf382b5cb4706b9999ea940688a82
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.4.log
@@ -0,0 +1,4562 @@
+# Running on gpub007.delta.ncsa.illinois.edu
+# Started at Mon Jul 10 05:11:28 CDT 2023
+# SLURMD_NODENAME=gpub007
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2141292
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2141292
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[007,021-024,037,050,052-054,066,073-075,078,091]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[007,021-024,037,050,052-054,066,073-075,078,091]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=2197444
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub007
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_3952ebc2-2c32-401a-8bff-e4f73cae86d4
+[gpub007:0/64] 2023-07-10 05:14:04,885 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub007:0/64] 2023-07-10 05:14:06,820 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub007:0/64] 2023-07-10 05:14:06,849 (s2t:483) INFO: Vocabulary size: 50002
+[gpub007:0/64] 2023-07-10 05:14:20,881 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub007:0/64] 2023-07-10 05:14:20,890 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub007:0/64] 2023-07-10 05:14:20,890 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub007:0/64] 2023-07-10 05:14:20,890 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub007:0/64] 2023-07-10 05:14:20,903 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub007:0/64] 2023-07-10 05:14:21,611 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub007:0/64] 2023-07-10 05:14:30,511 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 05:14:30,719 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f68609b7e80>)
+[gpub007:0/64] 2023-07-10 05:14:30,719 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub007:0/64] 2023-07-10 05:14:30,788 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub007:0/64] 2023-07-10 05:14:31,269 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 05:14:31,592 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f68609b7b20>)
+[gpub007:0/64] 2023-07-10 05:14:31,592 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub007:0/64] 2023-07-10 05:14:31,592 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub007:0/64] 2023-07-10 05:15:01,949 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub007:2197523:2197523 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0>
+gpub007:2197523:2197523 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub007:2197523:2197523 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub007:0/64] 2023-07-10 05:15:07,242 (trainer:284) INFO: 31/50epoch started
+[gpub007:0/64] 2023-07-10 05:15:07,288 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-10 05:15:24,567 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 05:15:27,864 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f67b85d2410>)
+[gpub007:0/64] 2023-07-10 05:15:27,864 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-10 05:15:27,870 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub053:1853280:1853280 [2] NCCL INFO cudaDriverVersion 12010
+gpub053:1853280:1853280 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1853280:1853280 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1853280:1853407 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1853280:1853407 [2] NCCL INFO Using network IB
+gpub053:1853280:1853407 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub053:1853280:1853407 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub053:1853280:1853407 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub053:1853280:1853407 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub053:1853280:1853407 [2] NCCL INFO Connected all rings
+gpub053:1853280:1853407 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub053:1853280:1853407 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub053:1853280:1853407 [2] NCCL INFO Connected all trees
+gpub053:1853280:1853407 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1853280:1853407 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1853280:1853407 [2] NCCL INFO comm 0x9f94540 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub053:1853278:1853278 [0] NCCL INFO cudaDriverVersion 12010
+gpub053:1853278:1853278 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1853278:1853278 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1853278:1853410 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1853278:1853410 [0] NCCL INFO Using network IB
+gpub053:1853278:1853410 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub053:1853278:1853410 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub053:1853278:1853410 [0] NCCL INFO Connected all rings
+gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub053:1853278:1853410 [0] NCCL INFO Connected all trees
+gpub053:1853278:1853410 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1853278:1853410 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1853278:1853410 [0] NCCL INFO comm 0x1b893b10 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub075:1498544:1498544 [1] NCCL INFO cudaDriverVersion 12010
+gpub075:1498544:1498544 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:1498544:1498544 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:1498544:1498668 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:1498544:1498668 [1] NCCL INFO Using network IB
+gpub075:1498544:1498668 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub075:1498544:1498668 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub075:1498544:1498668 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub075:1498544:1498668 [1] NCCL INFO Connected all rings
+gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub075:1498544:1498668 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub075:1498544:1498668 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub075:1498544:1498668 [1] NCCL INFO Connected all trees
+gpub075:1498544:1498668 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub075:1498544:1498668 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:1498544:1498668 [1] NCCL INFO comm 0x50791d40 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub053:1853279:1853279 [1] NCCL INFO cudaDriverVersion 12010
+gpub053:1853279:1853279 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1853279:1853279 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1853279:1853408 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1853279:1853408 [1] NCCL INFO Using network IB
+gpub053:1853279:1853408 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub053:1853279:1853408 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub053:1853279:1853408 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub053:1853279:1853408 [1] NCCL INFO Connected all rings
+gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub053:1853279:1853408 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub053:1853279:1853408 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub053:1853279:1853408 [1] NCCL INFO Connected all trees
+gpub053:1853279:1853408 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1853279:1853408 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1853279:1853408 [1] NCCL INFO comm 0x8cc80f50 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub022:3643869:3643869 [0] NCCL INFO cudaDriverVersion 12010
+gpub022:3643869:3643869 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3643869:3643869 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3643869:3644003 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3643869:3644003 [0] NCCL INFO Using network IB
+gpub022:3643869:3644003 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub022:3643869:3644003 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub022:3643869:3644003 [0] NCCL INFO Connected all rings
+gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub022:3643869:3644003 [0] NCCL INFO Connected all trees
+gpub022:3643869:3644003 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3643869:3644003 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3643869:3644003 [0] NCCL INFO comm 0xb08c9310 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub022:3643870:3643870 [1] NCCL INFO cudaDriverVersion 12010
+gpub022:3643870:3643870 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3643870:3643870 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3643870:3644001 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3643870:3644001 [1] NCCL INFO Using network IB
+gpub022:3643870:3644001 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub022:3643870:3644001 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub022:3643870:3644001 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub022:3643870:3644001 [1] NCCL INFO Connected all rings
+gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub022:3643870:3644001 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub022:3643870:3644001 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub022:3643870:3644001 [1] NCCL INFO Connected all trees
+gpub022:3643870:3644001 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3643870:3644001 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3643870:3644001 [1] NCCL INFO comm 0x97b1940 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub022:3643871:3643871 [2] NCCL INFO cudaDriverVersion 12010
+gpub022:3643871:3643871 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3643871:3643871 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3643871:3644002 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3643871:3644002 [2] NCCL INFO Using network IB
+gpub022:3643871:3644002 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub022:3643871:3644002 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub022:3643871:3644002 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub022:3643871:3644002 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub022:3643871:3644002 [2] NCCL INFO Connected all rings
+gpub022:3643871:3644002 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub022:3643871:3644002 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub022:3643871:3644002 [2] NCCL INFO Connected all trees
+gpub022:3643871:3644002 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3643871:3644002 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3643871:3644002 [2] NCCL INFO comm 0x50bbe140 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub075:1498543:1498543 [0] NCCL INFO cudaDriverVersion 12010
+gpub075:1498543:1498543 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:1498543:1498543 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:1498543:1498669 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:1498543:1498669 [0] NCCL INFO Using network IB
+gpub075:1498543:1498669 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub075:1498543:1498669 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub075:1498543:1498669 [0] NCCL INFO Connected all rings
+gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub075:1498543:1498669 [0] NCCL INFO Connected all trees
+gpub075:1498543:1498669 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub075:1498543:1498669 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:1498543:1498669 [0] NCCL INFO comm 0x50bdf1e0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub075:1498546:1498546 [3] NCCL INFO cudaDriverVersion 12010
+gpub075:1498546:1498546 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:1498546:1498546 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:1498546:1498670 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:1498546:1498670 [3] NCCL INFO Using network IB
+gpub075:1498546:1498670 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub075:1498546:1498670 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub075:1498546:1498670 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub075:1498546:1498670 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub075:1498546:1498670 [3] NCCL INFO Connected all rings
+gpub075:1498546:1498670 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub075:1498546:1498670 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub075:1498546:1498670 [3] NCCL INFO Connected all trees
+gpub075:1498546:1498670 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub075:1498546:1498670 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:1498546:1498670 [3] NCCL INFO comm 0x93fedd0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub022:3643872:3643872 [3] NCCL INFO cudaDriverVersion 12010
+gpub022:3643872:3643872 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3643872:3643872 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3643872:3644000 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3643872:3644000 [3] NCCL INFO Using network IB
+gpub022:3643872:3644000 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub022:3643872:3644000 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub022:3643872:3644000 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub022:3643872:3644000 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub022:3643872:3644000 [3] NCCL INFO Connected all rings
+gpub022:3643872:3644000 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub022:3643872:3644000 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub022:3643872:3644000 [3] NCCL INFO Connected all trees
+gpub022:3643872:3644000 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3643872:3644000 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3643872:3644000 [3] NCCL INFO comm 0x503e4a90 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:1853281:1853281 [3] NCCL INFO cudaDriverVersion 12010
+gpub053:1853281:1853281 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1853281:1853281 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1853281:1853409 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1853281:1853409 [3] NCCL INFO Using network IB
+gpub053:1853281:1853409 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub053:1853281:1853409 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub053:1853281:1853409 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub053:1853281:1853409 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub053:1853281:1853409 [3] NCCL INFO Connected all rings
+gpub053:1853281:1853409 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub053:1853281:1853409 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub053:1853281:1853409 [3] NCCL INFO Connected all trees
+gpub053:1853281:1853409 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1853281:1853409 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1853281:1853409 [3] NCCL INFO comm 0x9dd1ec40 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:3674023:3674023 [1] NCCL INFO cudaDriverVersion 12010
+gpub074:3674023:3674023 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3674023:3674023 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3674023:3674154 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3674023:3674154 [1] NCCL INFO Using network IB
+gpub074:3674023:3674154 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub074:3674023:3674154 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub074:3674023:3674154 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub074:3674023:3674154 [1] NCCL INFO Connected all rings
+gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub074:3674023:3674154 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub074:3674023:3674154 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub074:3674023:3674154 [1] NCCL INFO Connected all trees
+gpub074:3674023:3674154 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3674023:3674154 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3674023:3674154 [1] NCCL INFO comm 0x8b2ea8e0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub074:3674025:3674025 [3] NCCL INFO cudaDriverVersion 12010
+gpub074:3674025:3674025 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3674025:3674025 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3674025:3674155 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3674025:3674155 [3] NCCL INFO Using network IB
+gpub074:3674025:3674155 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub074:3674025:3674155 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub074:3674025:3674155 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub074:3674025:3674155 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub074:3674025:3674155 [3] NCCL INFO Connected all rings
+gpub074:3674025:3674155 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub074:3674025:3674155 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub074:3674025:3674155 [3] NCCL INFO Connected all trees
+gpub074:3674025:3674155 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3674025:3674155 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3674025:3674155 [3] NCCL INFO comm 0x8bfc7020 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:3674022:3674022 [0] NCCL INFO cudaDriverVersion 12010
+gpub074:3674022:3674022 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3674022:3674022 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3674022:3674152 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3674022:3674152 [0] NCCL INFO Using network IB
+gpub074:3674022:3674152 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub074:3674022:3674152 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub074:3674022:3674152 [0] NCCL INFO Connected all rings
+gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub074:3674022:3674152 [0] NCCL INFO Connected all trees
+gpub074:3674022:3674152 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3674022:3674152 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3674022:3674152 [0] NCCL INFO comm 0x50173c20 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub074:3674024:3674024 [2] NCCL INFO cudaDriverVersion 12010
+gpub074:3674024:3674024 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:3674024:3674024 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:3674024:3674153 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:3674024:3674153 [2] NCCL INFO Using network IB
+gpub074:3674024:3674153 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub074:3674024:3674153 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub074:3674024:3674153 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub074:3674024:3674153 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub074:3674024:3674153 [2] NCCL INFO Connected all rings
+gpub074:3674024:3674153 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub074:3674024:3674153 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub074:3674024:3674153 [2] NCCL INFO Connected all trees
+gpub074:3674024:3674153 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub074:3674024:3674153 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub074:3674024:3674153 [2] NCCL INFO comm 0x4fe75600 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub075:1498545:1498545 [2] NCCL INFO cudaDriverVersion 12010
+gpub075:1498545:1498545 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.175<0>
+gpub075:1498545:1498545 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub075:1498545:1498671 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.175<0>
+gpub075:1498545:1498671 [2] NCCL INFO Using network IB
+gpub075:1498545:1498671 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub075:1498545:1498671 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub075:1498545:1498671 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub075:1498545:1498671 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub075:1498545:1498671 [2] NCCL INFO Connected all rings
+gpub075:1498545:1498671 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub075:1498545:1498671 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub075:1498545:1498671 [2] NCCL INFO Connected all trees
+gpub075:1498545:1498671 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub075:1498545:1498671 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub075:1498545:1498671 [2] NCCL INFO comm 0x500cd5c0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:204478:204478 [3] NCCL INFO cudaDriverVersion 12010
+gpub078:204478:204478 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:204478:204478 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:204478:204627 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:204478:204627 [3] NCCL INFO Using network IB
+gpub078:204478:204627 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub078:204478:204627 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub078:204478:204627 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:204478:204627 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub078:204478:204627 [3] NCCL INFO Connected all rings
+gpub078:204478:204627 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:204478:204627 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub078:204478:204627 [3] NCCL INFO Connected all trees
+gpub078:204478:204627 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:204478:204627 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:204478:204627 [3] NCCL INFO comm 0x4fdeca90 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub078:204477:204477 [2] NCCL INFO cudaDriverVersion 12010
+gpub078:204477:204477 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:204477:204477 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:204477:204628 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:204477:204628 [2] NCCL INFO Using network IB
+gpub078:204477:204628 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub078:204477:204628 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub078:204477:204628 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:204477:204628 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub078:204477:204628 [2] NCCL INFO Connected all rings
+gpub078:204477:204628 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:204477:204628 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub078:204477:204628 [2] NCCL INFO Connected all trees
+gpub078:204477:204628 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:204477:204628 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:204477:204628 [2] NCCL INFO comm 0x950be50 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub021:390566:390566 [3] NCCL INFO cudaDriverVersion 12010
+gpub021:390566:390566 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0>
+gpub021:390566:390566 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub021:390566:390693 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0>
+gpub021:390566:390693 [3] NCCL INFO Using network IB
+gpub021:390566:390693 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub021:390566:390693 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub021:390566:390693 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub021:390566:390693 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub021:390566:390693 [3] NCCL INFO Connected all rings
+gpub021:390566:390693 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub021:390566:390693 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub021:390566:390693 [3] NCCL INFO Connected all trees
+gpub021:390566:390693 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub021:390566:390693 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub021:390566:390693 [3] NCCL INFO comm 0x51469130 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub054:3656063:3656063 [1] NCCL INFO cudaDriverVersion 12010
+gpub054:3656063:3656063 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0>
+gpub054:3656063:3656063 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub054:3656063:3656192 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0>
+gpub054:3656063:3656192 [1] NCCL INFO Using network IB
+gpub054:3656063:3656192 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub054:3656063:3656192 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub054:3656063:3656192 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub054:3656063:3656192 [1] NCCL INFO Connected all rings
+gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub054:3656063:3656192 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub054:3656063:3656192 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub054:3656063:3656192 [1] NCCL INFO Connected all trees
+gpub054:3656063:3656192 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub054:3656063:3656192 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub054:3656063:3656192 [1] NCCL INFO comm 0x50da24d0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub021:390563:390563 [0] NCCL INFO cudaDriverVersion 12010
+gpub021:390563:390563 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0>
+gpub021:390563:390563 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub021:390563:390695 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0>
+gpub021:390563:390695 [0] NCCL INFO Using network IB
+gpub021:390563:390695 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub021:390563:390695 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub021:390563:390695 [0] NCCL INFO Connected all rings
+gpub054:3656064:3656064 [2] NCCL INFO cudaDriverVersion 12010
+gpub054:3656064:3656064 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0>
+gpub054:3656064:3656064 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub054:3656064:3656191 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0>
+gpub054:3656064:3656191 [2] NCCL INFO Using network IB
+gpub054:3656064:3656191 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub054:3656064:3656191 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub054:3656064:3656191 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub054:3656064:3656191 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub054:3656064:3656191 [2] NCCL INFO Connected all rings
+gpub054:3656064:3656191 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub054:3656064:3656191 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub073:545753:545753 [3] NCCL INFO cudaDriverVersion 12010
+gpub073:545753:545753 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:545753:545753 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:545753:545878 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:545753:545878 [3] NCCL INFO Using network IB
+gpub073:545753:545878 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub073:545753:545878 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub073:545753:545878 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub073:545753:545878 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub073:545753:545878 [3] NCCL INFO Connected all rings
+gpub073:545753:545878 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub073:545753:545878 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub021:390563:390695 [0] NCCL INFO Connected all trees
+gpub021:390563:390695 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub021:390563:390695 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub021:390563:390695 [0] NCCL INFO comm 0x51605390 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub054:3656064:3656191 [2] NCCL INFO Connected all trees
+gpub054:3656064:3656191 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub054:3656064:3656191 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub054:3656064:3656191 [2] NCCL INFO comm 0x8a8d4950 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub073:545753:545878 [3] NCCL INFO Connected all trees
+gpub073:545753:545878 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:545753:545878 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:545753:545878 [3] NCCL INFO comm 0x50602840 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub021:390564:390564 [1] NCCL INFO cudaDriverVersion 12010
+gpub021:390564:390564 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0>
+gpub021:390564:390564 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub021:390564:390694 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0>
+gpub021:390564:390694 [1] NCCL INFO Using network IB
+gpub021:390564:390694 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub021:390564:390694 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub021:390564:390694 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub021:390564:390694 [1] NCCL INFO Connected all rings
+gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub054:3656065:3656065 [3] NCCL INFO cudaDriverVersion 12010
+gpub054:3656065:3656065 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0>
+gpub054:3656065:3656065 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub054:3656065:3656193 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0>
+gpub054:3656065:3656193 [3] NCCL INFO Using network IB
+gpub054:3656065:3656193 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub054:3656065:3656193 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub054:3656065:3656193 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub054:3656065:3656193 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub054:3656065:3656193 [3] NCCL INFO Connected all rings
+gpub054:3656065:3656193 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub054:3656065:3656193 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub021:390564:390694 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub021:390564:390694 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub021:390564:390694 [1] NCCL INFO Connected all trees
+gpub021:390564:390694 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub021:390564:390694 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub021:390564:390694 [1] NCCL INFO comm 0x9e6cc8d0 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub054:3656065:3656193 [3] NCCL INFO Connected all trees
+gpub054:3656065:3656193 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub054:3656065:3656193 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub054:3656065:3656193 [3] NCCL INFO comm 0xb59a2f90 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub037:1697458:1697458 [2] NCCL INFO cudaDriverVersion 12010
+gpub037:1697458:1697458 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1697458:1697458 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1697458:1697595 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1697458:1697595 [2] NCCL INFO Using network IB
+gpub037:1697458:1697595 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub037:1697458:1697595 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub037:1697458:1697595 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub037:1697458:1697595 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub037:1697458:1697595 [2] NCCL INFO Connected all rings
+gpub037:1697458:1697595 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub037:1697458:1697595 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub037:1697458:1697595 [2] NCCL INFO Connected all trees
+gpub037:1697458:1697595 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1697458:1697595 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1697458:1697595 [2] NCCL INFO comm 0x505de110 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub037:1697456:1697456 [0] NCCL INFO cudaDriverVersion 12010
+gpub037:1697456:1697456 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1697456:1697456 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1697456:1697592 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1697456:1697592 [0] NCCL INFO Using network IB
+gpub037:1697456:1697592 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub037:1697456:1697592 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub037:1697456:1697592 [0] NCCL INFO Connected all rings
+gpub024:181797:181797 [3] NCCL INFO cudaDriverVersion 12010
+gpub024:181797:181797 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:181797:181797 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:181797:181928 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:181797:181928 [3] NCCL INFO Using network IB
+gpub024:181797:181928 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub024:181797:181928 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub024:181797:181928 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub024:181797:181928 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub024:181797:181928 [3] NCCL INFO Connected all rings
+gpub024:181797:181928 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub024:181797:181928 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub037:1697456:1697592 [0] NCCL INFO Connected all trees
+gpub037:1697456:1697592 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1697456:1697592 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1697456:1697592 [0] NCCL INFO comm 0x51a3c2e0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub024:181797:181928 [3] NCCL INFO Connected all trees
+gpub024:181797:181928 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:181797:181928 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:181797:181928 [3] NCCL INFO comm 0x507c77a0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub050:2358256:2358256 [2] NCCL INFO cudaDriverVersion 12010
+gpub050:2358256:2358256 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2358256:2358256 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2358256:2358382 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2358256:2358382 [2] NCCL INFO Using network IB
+gpub050:2358256:2358382 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub050:2358256:2358382 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub050:2358256:2358382 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub050:2358256:2358382 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub050:2358256:2358382 [2] NCCL INFO Connected all rings
+gpub050:2358256:2358382 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub050:2358256:2358382 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub007:2197525:2197525 [2] NCCL INFO cudaDriverVersion 12010
+gpub007:2197525:2197525 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0>
+gpub007:2197525:2197525 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub007:2197525:2197660 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0>
+gpub007:2197525:2197660 [2] NCCL INFO Using network IB
+gpub007:2197525:2197660 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub007:2197525:2197660 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub007:2197525:2197660 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub007:2197525:2197660 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub007:2197525:2197660 [2] NCCL INFO Connected all rings
+gpub007:2197525:2197660 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub007:2197525:2197660 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub050:2358256:2358382 [2] NCCL INFO Connected all trees
+gpub050:2358256:2358382 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2358256:2358382 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2358256:2358382 [2] NCCL INFO comm 0x5136dc80 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub007:2197525:2197660 [2] NCCL INFO Connected all trees
+gpub007:2197525:2197660 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub007:2197525:2197660 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub007:2197525:2197660 [2] NCCL INFO comm 0xb6001d10 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub024:181794:181794 [0] NCCL INFO cudaDriverVersion 12010
+gpub024:181794:181794 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:181794:181794 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:181794:181927 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:181794:181927 [0] NCCL INFO Using network IB
+gpub024:181794:181927 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub024:181794:181927 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub024:181794:181927 [0] NCCL INFO Connected all rings
+gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub024:181794:181927 [0] NCCL INFO Connected all trees
+gpub024:181794:181927 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:181794:181927 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:181794:181927 [0] NCCL INFO comm 0xa37e3f90 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub007:2197524:2197524 [1] NCCL INFO cudaDriverVersion 12010
+gpub007:2197524:2197524 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0>
+gpub007:2197524:2197524 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub007:2197524:2197661 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0>
+gpub007:2197524:2197661 [1] NCCL INFO Using network IB
+gpub007:2197524:2197661 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub007:2197524:2197661 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub007:2197524:2197661 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub007:2197524:2197661 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub007:2197524:2197661 [1] NCCL INFO Connected all rings
+gpub007:2197524:2197661 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub007:2197524:2197661 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub007:2197524:2197661 [1] NCCL INFO Connected all trees
+gpub007:2197524:2197661 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub007:2197524:2197661 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub007:2197524:2197661 [1] NCCL INFO comm 0x8cce800 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub050:2358255:2358255 [1] NCCL INFO cudaDriverVersion 12010
+gpub050:2358255:2358255 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2358255:2358255 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2358255:2358384 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2358255:2358384 [1] NCCL INFO Using network IB
+gpub050:2358255:2358384 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub050:2358255:2358384 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub050:2358255:2358384 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub050:2358255:2358384 [1] NCCL INFO Connected all rings
+gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub050:2358255:2358384 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub050:2358255:2358384 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub050:2358255:2358384 [1] NCCL INFO Connected all trees
+gpub050:2358255:2358384 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2358255:2358384 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2358255:2358384 [1] NCCL INFO comm 0xb33f9a00 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub050:2358254:2358254 [0] NCCL INFO cudaDriverVersion 12010
+gpub050:2358254:2358254 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2358254:2358254 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2358254:2358383 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2358254:2358383 [0] NCCL INFO Using network IB
+gpub050:2358254:2358383 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub050:2358254:2358383 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub050:2358254:2358383 [0] NCCL INFO Connected all rings
+gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub050:2358254:2358383 [0] NCCL INFO Connected all trees
+gpub050:2358254:2358383 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2358254:2358383 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2358254:2358383 [0] NCCL INFO comm 0xab879950 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub073:545751:545751 [1] NCCL INFO cudaDriverVersion 12010
+gpub073:545751:545751 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:545751:545751 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:545751:545879 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:545751:545879 [1] NCCL INFO Using network IB
+gpub073:545751:545879 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub073:545751:545879 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub073:545751:545879 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub073:545751:545879 [1] NCCL INFO Connected all rings
+gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub052:2095168:2095168 [2] NCCL INFO cudaDriverVersion 12010
+gpub052:2095168:2095168 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2095168:2095168 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2095168:2095300 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2095168:2095300 [2] NCCL INFO Using network IB
+gpub052:2095168:2095300 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub052:2095168:2095300 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub052:2095168:2095300 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub052:2095168:2095300 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub052:2095168:2095300 [2] NCCL INFO Connected all rings
+gpub052:2095168:2095300 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub052:2095168:2095300 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub073:545751:545879 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub073:545751:545879 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub073:545751:545879 [1] NCCL INFO Connected all trees
+gpub073:545751:545879 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:545751:545879 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:545751:545879 [1] NCCL INFO comm 0x5091fcf0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub052:2095168:2095300 [2] NCCL INFO Connected all trees
+gpub052:2095168:2095300 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2095168:2095300 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2095168:2095300 [2] NCCL INFO comm 0x9dba4a0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub024:181796:181796 [2] NCCL INFO cudaDriverVersion 12010
+gpub024:181796:181796 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:181796:181796 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:181796:181926 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:181796:181926 [2] NCCL INFO Using network IB
+gpub024:181796:181926 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub024:181796:181926 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub024:181796:181926 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub024:181796:181926 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub024:181796:181926 [2] NCCL INFO Connected all rings
+gpub024:181796:181926 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub024:181796:181926 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub024:181796:181926 [2] NCCL INFO Connected all trees
+gpub052:2095166:2095166 [0] NCCL INFO cudaDriverVersion 12010
+gpub052:2095166:2095166 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2095166:2095166 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2095166:2095301 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2095166:2095301 [0] NCCL INFO Using network IB
+gpub052:2095166:2095301 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub052:2095166:2095301 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub052:2095166:2095301 [0] NCCL INFO Connected all rings
+gpub024:181796:181926 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:181796:181926 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:181796:181926 [2] NCCL INFO comm 0x9361eb0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub021:390565:390565 [2] NCCL INFO cudaDriverVersion 12010
+gpub021:390565:390565 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.121<0>
+gpub021:390565:390565 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub021:390565:390696 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.121<0>
+gpub021:390565:390696 [2] NCCL INFO Using network IB
+gpub021:390565:390696 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub021:390565:390696 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub021:390565:390696 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub021:390565:390696 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub021:390565:390696 [2] NCCL INFO Connected all rings
+gpub021:390565:390696 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub021:390565:390696 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub021:390565:390696 [2] NCCL INFO Connected all trees
+gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub052:2095166:2095301 [0] NCCL INFO Connected all trees
+gpub052:2095166:2095301 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2095166:2095301 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2095166:2095301 [0] NCCL INFO comm 0x98eb2450 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub021:390565:390696 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub021:390565:390696 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub021:390565:390696 [2] NCCL INFO comm 0x507a64a0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub054:3656062:3656062 [0] NCCL INFO cudaDriverVersion 12010
+gpub054:3656062:3656062 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.154<0>
+gpub054:3656062:3656062 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub054:3656062:3656190 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.154<0>
+gpub054:3656062:3656190 [0] NCCL INFO Using network IB
+gpub054:3656062:3656190 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub054:3656062:3656190 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub054:3656062:3656190 [0] NCCL INFO Connected all rings
+gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub054:3656062:3656190 [0] NCCL INFO Connected all trees
+gpub054:3656062:3656190 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub054:3656062:3656190 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub054:3656062:3656190 [0] NCCL INFO comm 0x83d6380 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub007:2197526:2197526 [3] NCCL INFO cudaDriverVersion 12010
+gpub007:2197526:2197526 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.107<0>
+gpub007:2197526:2197526 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub007:2197526:2197659 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0>
+gpub007:2197526:2197659 [3] NCCL INFO Using network IB
+gpub007:2197526:2197659 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub007:2197526:2197659 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub007:2197526:2197659 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub007:2197526:2197659 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub007:2197526:2197659 [3] NCCL INFO Connected all rings
+gpub007:2197526:2197659 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub007:2197526:2197659 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub007:2197526:2197659 [3] NCCL INFO Connected all trees
+gpub007:2197526:2197659 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub007:2197526:2197659 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub007:2197526:2197659 [3] NCCL INFO comm 0xaeb3b350 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub037:1697459:1697459 [3] NCCL INFO cudaDriverVersion 12010
+gpub037:1697459:1697459 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1697459:1697459 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1697459:1697594 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1697459:1697594 [3] NCCL INFO Using network IB
+gpub037:1697459:1697594 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub037:1697459:1697594 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub037:1697459:1697594 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub037:1697459:1697594 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub037:1697459:1697594 [3] NCCL INFO Connected all rings
+gpub037:1697459:1697594 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub037:1697459:1697594 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub037:1697459:1697594 [3] NCCL INFO Connected all trees
+gpub037:1697459:1697594 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1697459:1697594 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1697459:1697594 [3] NCCL INFO comm 0x4f67f390 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub007:2197523:2197658 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.107<0>
+gpub007:2197523:2197658 [0] NCCL INFO Using network IB
+gpub007:2197523:2197658 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub007:2197523:2197658 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub007:2197523:2197658 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub007:2197523:2197658 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub007:2197523:2197658 [0] NCCL INFO Connected all rings
+gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub007:2197523:2197658 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub007:2197523:2197658 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub007:2197523:2197658 [0] NCCL INFO Connected all trees
+gpub007:2197523:2197658 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub007:2197523:2197658 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub007:2197523:2197658 [0] NCCL INFO comm 0x8cd7e00 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub073:545750:545750 [0] NCCL INFO cudaDriverVersion 12010
+gpub073:545750:545750 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:545750:545750 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:545750:545881 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:545750:545881 [0] NCCL INFO Using network IB
+gpub073:545750:545881 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub073:545750:545881 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub073:545750:545881 [0] NCCL INFO Connected all rings
+gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub073:545750:545881 [0] NCCL INFO Connected all trees
+gpub073:545750:545881 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:545750:545881 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:545750:545881 [0] NCCL INFO comm 0x8ee1c2d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub073:545752:545752 [2] NCCL INFO cudaDriverVersion 12010
+gpub073:545752:545752 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.173<0>
+gpub073:545752:545752 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub073:545752:545880 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.173<0>
+gpub073:545752:545880 [2] NCCL INFO Using network IB
+gpub073:545752:545880 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub073:545752:545880 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub073:545752:545880 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub073:545752:545880 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub073:545752:545880 [2] NCCL INFO Connected all rings
+gpub073:545752:545880 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub073:545752:545880 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub073:545752:545880 [2] NCCL INFO Connected all trees
+gpub073:545752:545880 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub073:545752:545880 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub073:545752:545880 [2] NCCL INFO comm 0x9222770 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub050:2358257:2358257 [3] NCCL INFO cudaDriverVersion 12010
+gpub050:2358257:2358257 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:2358257:2358257 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:2358257:2358385 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:2358257:2358385 [3] NCCL INFO Using network IB
+gpub050:2358257:2358385 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub050:2358257:2358385 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub050:2358257:2358385 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub050:2358257:2358385 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub050:2358257:2358385 [3] NCCL INFO Connected all rings
+gpub050:2358257:2358385 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub050:2358257:2358385 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub050:2358257:2358385 [3] NCCL INFO Connected all trees
+gpub050:2358257:2358385 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:2358257:2358385 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:2358257:2358385 [3] NCCL INFO comm 0x1bc447d0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub066:1609738:1609738 [2] NCCL INFO cudaDriverVersion 12010
+gpub066:1609738:1609738 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1609738:1609738 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1609738:1609861 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1609738:1609861 [2] NCCL INFO Using network IB
+gpub066:1609738:1609861 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub066:1609738:1609861 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub066:1609738:1609861 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub066:1609738:1609861 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub066:1609738:1609861 [2] NCCL INFO Connected all rings
+gpub066:1609738:1609861 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub066:1609738:1609861 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub066:1609738:1609861 [2] NCCL INFO Connected all trees
+gpub066:1609738:1609861 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1609738:1609861 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1609738:1609861 [2] NCCL INFO comm 0x98236b90 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub066:1609739:1609739 [3] NCCL INFO cudaDriverVersion 12010
+gpub066:1609739:1609739 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1609739:1609739 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1609739:1609860 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1609739:1609860 [3] NCCL INFO Using network IB
+gpub066:1609739:1609860 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub066:1609739:1609860 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub066:1609739:1609860 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub066:1609739:1609860 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub066:1609739:1609860 [3] NCCL INFO Connected all rings
+gpub066:1609739:1609860 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub066:1609739:1609860 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub066:1609739:1609860 [3] NCCL INFO Connected all trees
+gpub066:1609739:1609860 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1609739:1609860 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1609739:1609860 [3] NCCL INFO comm 0x50eb5f50 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub066:1609737:1609737 [1] NCCL INFO cudaDriverVersion 12010
+gpub066:1609737:1609737 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1609737:1609737 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1609737:1609862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1609737:1609862 [1] NCCL INFO Using network IB
+gpub066:1609737:1609862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub066:1609737:1609862 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub066:1609737:1609862 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub066:1609737:1609862 [1] NCCL INFO Connected all rings
+gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub078:204475:204475 [0] NCCL INFO cudaDriverVersion 12010
+gpub078:204475:204475 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:204475:204475 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:204475:204626 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:204475:204626 [0] NCCL INFO Using network IB
+gpub078:204475:204626 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub078:204475:204626 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub078:204475:204626 [0] NCCL INFO Connected all rings
+gpub066:1609737:1609862 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub066:1609737:1609862 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub066:1609737:1609862 [1] NCCL INFO Connected all trees
+gpub066:1609737:1609862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1609737:1609862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1609737:1609862 [1] NCCL INFO comm 0x8ebd08f0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub078:204475:204626 [0] NCCL INFO Connected all trees
+gpub078:204475:204626 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:204475:204626 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:204475:204626 [0] NCCL INFO comm 0x506b5c80 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub078:204476:204476 [1] NCCL INFO cudaDriverVersion 12010
+gpub078:204476:204476 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:204476:204476 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:204476:204625 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:204476:204625 [1] NCCL INFO Using network IB
+gpub078:204476:204625 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub078:204476:204625 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:204476:204625 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub078:204476:204625 [1] NCCL INFO Connected all rings
+gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub078:204476:204625 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:204476:204625 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub078:204476:204625 [1] NCCL INFO Connected all trees
+gpub078:204476:204625 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:204476:204625 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:204476:204625 [1] NCCL INFO comm 0xba083a40 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub066:1609736:1609736 [0] NCCL INFO cudaDriverVersion 12010
+gpub066:1609736:1609736 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1609736:1609736 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1609736:1609863 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1609736:1609863 [0] NCCL INFO Using network IB
+gpub066:1609736:1609863 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub066:1609736:1609863 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub066:1609736:1609863 [0] NCCL INFO Connected all rings
+gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub066:1609736:1609863 [0] NCCL INFO Connected all trees
+gpub066:1609736:1609863 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1609736:1609863 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1609736:1609863 [0] NCCL INFO comm 0x8ce8640 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub091:2047659:2047659 [1] NCCL INFO cudaDriverVersion 12010
+gpub091:2047659:2047659 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:2047659:2047659 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:2047659:2047795 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:2047659:2047795 [1] NCCL INFO Using network IB
+gpub091:2047659:2047795 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub091:2047659:2047795 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub091:2047659:2047795 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub091:2047659:2047795 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub091:2047659:2047795 [1] NCCL INFO Connected all rings
+gpub091:2047659:2047795 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub091:2047659:2047795 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub091:2047659:2047795 [1] NCCL INFO Connected all trees
+gpub091:2047659:2047795 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub091:2047659:2047795 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:2047659:2047795 [1] NCCL INFO comm 0x50f1f160 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub037:1697457:1697457 [1] NCCL INFO cudaDriverVersion 12010
+gpub037:1697457:1697457 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1697457:1697457 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1697457:1697593 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1697457:1697593 [1] NCCL INFO Using network IB
+gpub037:1697457:1697593 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub037:1697457:1697593 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub037:1697457:1697593 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub037:1697457:1697593 [1] NCCL INFO Connected all rings
+gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub037:1697457:1697593 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub037:1697457:1697593 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub037:1697457:1697593 [1] NCCL INFO Connected all trees
+gpub037:1697457:1697593 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1697457:1697593 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1697457:1697593 [1] NCCL INFO comm 0x50ddb3c0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub024:181795:181795 [1] NCCL INFO cudaDriverVersion 12010
+gpub024:181795:181795 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.124<0>
+gpub024:181795:181795 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub024:181795:181929 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.124<0>
+gpub024:181795:181929 [1] NCCL INFO Using network IB
+gpub024:181795:181929 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub024:181795:181929 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub024:181795:181929 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub024:181795:181929 [1] NCCL INFO Connected all rings
+gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub024:181795:181929 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub024:181795:181929 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub024:181795:181929 [1] NCCL INFO Connected all trees
+gpub024:181795:181929 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub024:181795:181929 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub024:181795:181929 [1] NCCL INFO comm 0x9cf8ce0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub023:3256278:3256278 [3] NCCL INFO cudaDriverVersion 12010
+gpub023:3256278:3256278 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0>
+gpub023:3256278:3256278 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub023:3256278:3256407 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0>
+gpub023:3256278:3256407 [3] NCCL INFO Using network IB
+gpub023:3256278:3256407 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub023:3256278:3256407 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub023:3256278:3256407 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub023:3256278:3256407 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub023:3256278:3256407 [3] NCCL INFO Connected all rings
+gpub023:3256278:3256407 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub023:3256278:3256407 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub023:3256278:3256407 [3] NCCL INFO Connected all trees
+gpub023:3256278:3256407 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub023:3256278:3256407 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub023:3256278:3256407 [3] NCCL INFO comm 0x5130f2d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:2095169:2095169 [3] NCCL INFO cudaDriverVersion 12010
+gpub052:2095169:2095169 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2095169:2095169 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2095169:2095299 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2095169:2095299 [3] NCCL INFO Using network IB
+gpub052:2095169:2095299 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub052:2095169:2095299 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub052:2095169:2095299 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub052:2095169:2095299 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub052:2095169:2095299 [3] NCCL INFO Connected all rings
+gpub052:2095169:2095299 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub052:2095169:2095299 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub052:2095169:2095299 [3] NCCL INFO Connected all trees
+gpub052:2095169:2095299 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2095169:2095299 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2095169:2095299 [3] NCCL INFO comm 0x93a3e60 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:2095167:2095167 [1] NCCL INFO cudaDriverVersion 12010
+gpub052:2095167:2095167 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:2095167:2095167 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:2095167:2095302 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:2095167:2095302 [1] NCCL INFO Using network IB
+gpub052:2095167:2095302 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub052:2095167:2095302 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub052:2095167:2095302 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub052:2095167:2095302 [1] NCCL INFO Connected all rings
+gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub091:2047658:2047658 [0] NCCL INFO cudaDriverVersion 12010
+gpub091:2047658:2047658 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:2047658:2047658 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:2047658:2047793 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:2047658:2047793 [0] NCCL INFO Using network IB
+gpub091:2047658:2047793 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub091:2047658:2047793 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub091:2047658:2047793 [0] NCCL INFO Connected all rings
+gpub052:2095167:2095302 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub052:2095167:2095302 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub052:2095167:2095302 [1] NCCL INFO Connected all trees
+gpub052:2095167:2095302 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:2095167:2095302 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:2095167:2095302 [1] NCCL INFO comm 0x168bcad0 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub091:2047658:2047793 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub091:2047658:2047793 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub091:2047658:2047793 [0] NCCL INFO Connected all trees
+gpub091:2047658:2047793 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub091:2047658:2047793 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:2047658:2047793 [0] NCCL INFO comm 0x50181450 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub023:3256276:3256276 [1] NCCL INFO cudaDriverVersion 12010
+gpub023:3256276:3256276 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0>
+gpub023:3256276:3256276 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub023:3256276:3256406 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0>
+gpub023:3256276:3256406 [1] NCCL INFO Using network IB
+gpub023:3256276:3256406 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub023:3256276:3256406 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub023:3256276:3256406 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub023:3256276:3256406 [1] NCCL INFO Connected all rings
+gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub023:3256276:3256406 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub023:3256276:3256406 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub023:3256276:3256406 [1] NCCL INFO Connected all trees
+gpub023:3256276:3256406 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub023:3256276:3256406 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub023:3256276:3256406 [1] NCCL INFO comm 0x8cb3a6d0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub091:2047661:2047661 [3] NCCL INFO cudaDriverVersion 12010
+gpub091:2047661:2047661 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:2047661:2047661 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:2047661:2047794 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:2047661:2047794 [3] NCCL INFO Using network IB
+gpub091:2047661:2047794 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub091:2047661:2047794 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub091:2047661:2047794 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub091:2047661:2047794 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub091:2047661:2047794 [3] NCCL INFO Connected all rings
+gpub091:2047661:2047794 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub091:2047661:2047794 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub091:2047661:2047794 [3] NCCL INFO Connected all trees
+gpub091:2047661:2047794 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub091:2047661:2047794 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:2047661:2047794 [3] NCCL INFO comm 0x8cc3b130 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub023:3256277:3256277 [2] NCCL INFO cudaDriverVersion 12010
+gpub023:3256277:3256277 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0>
+gpub023:3256277:3256277 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub023:3256277:3256404 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0>
+gpub023:3256277:3256404 [2] NCCL INFO Using network IB
+gpub023:3256277:3256404 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub023:3256277:3256404 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub023:3256277:3256404 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub023:3256277:3256404 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub023:3256277:3256404 [2] NCCL INFO Connected all rings
+gpub023:3256277:3256404 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub023:3256277:3256404 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub023:3256277:3256404 [2] NCCL INFO Connected all trees
+gpub023:3256277:3256404 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub023:3256277:3256404 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub023:3256277:3256404 [2] NCCL INFO comm 0x4f2ef4b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub023:3256275:3256275 [0] NCCL INFO cudaDriverVersion 12010
+gpub023:3256275:3256275 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.123<0>
+gpub023:3256275:3256275 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub023:3256275:3256405 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.123<0>
+gpub023:3256275:3256405 [0] NCCL INFO Using network IB
+gpub023:3256275:3256405 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub023:3256275:3256405 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub023:3256275:3256405 [0] NCCL INFO Connected all rings
+gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub023:3256275:3256405 [0] NCCL INFO Connected all trees
+gpub023:3256275:3256405 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub023:3256275:3256405 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub023:3256275:3256405 [0] NCCL INFO comm 0xa128c40 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub091:2047660:2047660 [2] NCCL INFO cudaDriverVersion 12010
+gpub091:2047660:2047660 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.191<0>
+gpub091:2047660:2047660 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub091:2047660:2047792 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.191<0>
+gpub091:2047660:2047792 [2] NCCL INFO Using network IB
+gpub091:2047660:2047792 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub091:2047660:2047792 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub091:2047660:2047792 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub091:2047660:2047792 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub091:2047660:2047792 [2] NCCL INFO Connected all rings
+gpub091:2047660:2047792 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub091:2047660:2047792 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub091:2047660:2047792 [2] NCCL INFO Connected all trees
+gpub091:2047660:2047792 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub091:2047660:2047792 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub091:2047660:2047792 [2] NCCL INFO comm 0xbeb51a80 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+[gpub007:0/64] 2023-07-10 05:23:10,955 (trainer:732) INFO: 31epoch:train:1-100batch: iter_time=1.183, forward_time=0.245, loss_ctc=83.079, loss_att=57.526, acc=0.711, loss=65.192, backward_time=1.046, grad_norm=110.657, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.565e-05, train_time=9.671
+[gpub007:0/64] 2023-07-10 05:25:37,047 (trainer:732) INFO: 31epoch:train:101-200batch: iter_time=1.109e-04, forward_time=0.143, loss_ctc=79.221, loss_att=65.254, acc=0.698, loss=69.444, backward_time=1.041, grad_norm=126.389, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.564e-05, train_time=2.923
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub007:0/64] 2023-07-10 05:28:01,147 (trainer:732) INFO: 31epoch:train:201-300batch: iter_time=1.051e-04, forward_time=0.143, loss_ctc=74.048, loss_att=56.198, acc=0.684, loss=61.553, backward_time=1.037, grad_norm=124.834, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.562e-05, train_time=2.882
+[gpub007:0/64] 2023-07-10 05:30:23,878 (trainer:732) INFO: 31epoch:train:301-400batch: iter_time=1.221e-04, forward_time=0.142, loss_ctc=68.222, loss_att=55.104, acc=0.713, loss=59.039, backward_time=1.037, grad_norm=98.763, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.561e-05, train_time=2.854
+[gpub007:0/64] 2023-07-10 05:32:49,468 (trainer:732) INFO: 31epoch:train:401-500batch: iter_time=1.193e-04, forward_time=0.143, loss_ctc=72.589, loss_att=59.138, acc=0.708, loss=63.173, backward_time=1.037, grad_norm=105.545, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.560e-05, train_time=2.912
+[gpub007:0/64] 2023-07-10 05:35:12,590 (trainer:732) INFO: 31epoch:train:501-600batch: iter_time=1.200e-04, forward_time=0.142, loss_ctc=78.429, loss_att=57.192, acc=0.714, loss=63.563, backward_time=1.029, grad_norm=115.940, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.559e-05, train_time=2.862
+[gpub007:0/64] 2023-07-10 05:37:33,602 (trainer:732) INFO: 31epoch:train:601-700batch: iter_time=1.086e-04, forward_time=0.142, loss_ctc=73.557, loss_att=60.681, acc=0.695, loss=64.544, backward_time=1.039, grad_norm=98.771, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.558e-05, train_time=2.820
+[gpub007:0/64] 2023-07-10 05:39:51,847 (trainer:732) INFO: 31epoch:train:701-800batch: iter_time=1.078e-04, forward_time=0.143, loss_ctc=78.927, loss_att=60.176, acc=0.707, loss=65.802, backward_time=1.027, grad_norm=101.784, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.557e-05, train_time=2.765
+[gpub007:0/64] 2023-07-10 05:41:04,583 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-10 05:41:22,351 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 05:41:25,696 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5f66d93be0>)
+[gpub007:0/64] 2023-07-10 05:41:25,696 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-10 05:41:25,818 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 05:46:54,397 (trainer:732) INFO: 31epoch:train:801-900batch: iter_time=2.610, forward_time=0.147, loss_ctc=84.353, loss_att=59.212, acc=0.713, loss=66.754, backward_time=1.050, grad_norm=113.914, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.556e-05, train_time=8.451
+[gpub007:0/64] 2023-07-10 05:49:10,529 (trainer:732) INFO: 31epoch:train:901-1000batch: iter_time=1.215e-04, forward_time=0.144, loss_ctc=80.485, loss_att=64.454, acc=0.685, loss=69.263, backward_time=1.026, grad_norm=113.670, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.555e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 05:51:25,984 (trainer:732) INFO: 31epoch:train:1001-1100batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=73.551, loss_att=55.639, acc=0.676, loss=61.013, backward_time=1.024, grad_norm=112.044, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.553e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 05:53:41,424 (trainer:732) INFO: 31epoch:train:1101-1200batch: iter_time=1.319e-04, forward_time=0.146, loss_ctc=72.723, loss_att=58.415, acc=0.710, loss=62.707, backward_time=1.025, grad_norm=111.492, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.552e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 05:55:57,268 (trainer:732) INFO: 31epoch:train:1201-1300batch: iter_time=1.616e-04, forward_time=0.147, loss_ctc=66.671, loss_att=53.081, acc=0.717, loss=57.158, backward_time=1.027, grad_norm=98.750, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.551e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 05:58:12,720 (trainer:732) INFO: 31epoch:train:1301-1400batch: iter_time=1.501e-04, forward_time=0.147, loss_ctc=76.363, loss_att=59.188, acc=0.694, loss=64.341, backward_time=1.025, grad_norm=114.733, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.550e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 06:00:28,243 (trainer:732) INFO: 31epoch:train:1401-1500batch: iter_time=1.642e-04, forward_time=0.148, loss_ctc=75.285, loss_att=58.303, acc=0.706, loss=63.398, backward_time=1.026, grad_norm=103.269, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.181, optim0_lr0=6.549e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 06:02:43,552 (trainer:732) INFO: 31epoch:train:1501-1600batch: iter_time=1.357e-04, forward_time=0.145, loss_ctc=74.621, loss_att=54.813, acc=0.698, loss=60.756, backward_time=1.022, grad_norm=114.863, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.548e-05, train_time=2.706
+[gpub007:0/64] 2023-07-10 06:04:14,468 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-10 06:04:32,666 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 06:04:36,091 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f678d599480>)
+[gpub007:0/64] 2023-07-10 06:04:36,091 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-10 06:04:36,097 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 06:10:05,276 (trainer:732) INFO: 31epoch:train:1601-1700batch: iter_time=1.268, forward_time=0.177, loss_ctc=83.861, loss_att=62.946, acc=0.716, loss=69.221, backward_time=1.036, grad_norm=111.788, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.547e-05, train_time=8.834
+[gpub007:0/64] 2023-07-10 06:12:21,570 (trainer:732) INFO: 31epoch:train:1701-1800batch: iter_time=1.262e-04, forward_time=0.144, loss_ctc=84.358, loss_att=61.690, acc=0.703, loss=68.491, backward_time=1.027, grad_norm=128.704, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.180, optim0_lr0=6.546e-05, train_time=2.726
+[gpub007:0/64] 2023-07-10 06:14:37,387 (trainer:732) INFO: 31epoch:train:1801-1900batch: iter_time=1.264e-04, forward_time=0.144, loss_ctc=77.029, loss_att=58.693, acc=0.688, loss=64.194, backward_time=1.022, grad_norm=100.689, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.544e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 06:16:53,035 (trainer:732) INFO: 31epoch:train:1901-2000batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=68.921, loss_att=52.419, acc=0.709, loss=57.370, backward_time=1.023, grad_norm=108.763, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.179, optim0_lr0=6.543e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 06:19:08,914 (trainer:732) INFO: 31epoch:train:2001-2100batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=68.731, loss_att=53.877, acc=0.724, loss=58.333, backward_time=1.025, grad_norm=116.687, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.542e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 06:21:24,934 (trainer:732) INFO: 31epoch:train:2101-2200batch: iter_time=1.289e-04, forward_time=0.146, loss_ctc=77.295, loss_att=63.073, acc=0.695, loss=67.340, backward_time=1.026, grad_norm=121.316, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.541e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 06:23:41,908 (trainer:732) INFO: 31epoch:train:2201-2300batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=76.729, loss_att=58.069, acc=0.719, loss=63.667, backward_time=1.028, grad_norm=96.608, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.540e-05, train_time=2.739
+[gpub007:0/64] 2023-07-10 06:26:08,069 (trainer:732) INFO: 31epoch:train:2301-2400batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=69.112, loss_att=51.060, acc=0.720, loss=56.476, backward_time=1.035, grad_norm=91.312, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.539e-05, train_time=2.923
+[gpub007:0/64] 2023-07-10 06:28:26,644 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-10 06:28:44,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 06:28:48,049 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6628ba3ee0>)
+[gpub007:0/64] 2023-07-10 06:28:48,049 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-10 06:28:48,056 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 06:32:15,761 (trainer:732) INFO: 31epoch:train:2401-2500batch: iter_time=1.333, forward_time=0.155, loss_ctc=82.287, loss_att=63.709, acc=0.701, loss=69.282, backward_time=1.033, grad_norm=121.817, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.538e-05, train_time=7.352
+[gpub007:0/64] 2023-07-10 06:34:33,345 (trainer:732) INFO: 31epoch:train:2501-2600batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=83.004, loss_att=63.233, acc=0.707, loss=69.164, backward_time=1.032, grad_norm=133.547, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.537e-05, train_time=2.753
+[gpub007:0/64] 2023-07-10 06:36:49,325 (trainer:732) INFO: 31epoch:train:2601-2700batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=75.378, loss_att=57.609, acc=0.688, loss=62.940, backward_time=1.026, grad_norm=107.810, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.535e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 06:39:04,998 (trainer:732) INFO: 31epoch:train:2701-2800batch: iter_time=9.780e-05, forward_time=0.145, loss_ctc=69.226, loss_att=52.124, acc=0.709, loss=57.255, backward_time=1.024, grad_norm=97.581, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.534e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 06:41:21,063 (trainer:732) INFO: 31epoch:train:2801-2900batch: iter_time=1.099e-04, forward_time=0.144, loss_ctc=68.771, loss_att=53.569, acc=0.723, loss=58.130, backward_time=1.026, grad_norm=95.572, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.533e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 06:43:36,896 (trainer:732) INFO: 31epoch:train:2901-3000batch: iter_time=1.308e-04, forward_time=0.145, loss_ctc=77.273, loss_att=63.630, acc=0.695, loss=67.723, backward_time=1.025, grad_norm=111.271, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.532e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 06:45:52,595 (trainer:732) INFO: 31epoch:train:3001-3100batch: iter_time=1.373e-04, forward_time=0.145, loss_ctc=74.247, loss_att=57.709, acc=0.714, loss=62.671, backward_time=1.025, grad_norm=107.453, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.531e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 06:48:08,299 (trainer:732) INFO: 31epoch:train:3101-3200batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=69.916, loss_att=51.295, acc=0.719, loss=56.881, backward_time=1.025, grad_norm=117.528, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.530e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 06:50:25,122 (trainer:732) INFO: 31epoch:train:3201-3300batch: iter_time=1.503e-04, forward_time=0.146, loss_ctc=81.122, loss_att=63.230, acc=0.700, loss=68.598, backward_time=1.025, grad_norm=121.151, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.529e-05, train_time=2.736
+[gpub007:0/64] 2023-07-10 06:51:12,584 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-10 06:51:30,720 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 06:51:34,138 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65f3f7b550>)
+[gpub007:0/64] 2023-07-10 06:51:34,139 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-10 06:51:34,145 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 06:57:49,606 (trainer:732) INFO: 31epoch:train:3301-3400batch: iter_time=1.315, forward_time=0.145, loss_ctc=79.129, loss_att=54.895, acc=0.716, loss=62.165, backward_time=1.040, grad_norm=112.164, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.528e-05, train_time=8.889
+[gpub007:0/64] 2023-07-10 07:00:05,807 (trainer:732) INFO: 31epoch:train:3401-3500batch: iter_time=1.349e-04, forward_time=0.146, loss_ctc=78.453, loss_att=63.141, acc=0.690, loss=67.735, backward_time=1.027, grad_norm=113.408, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.527e-05, train_time=2.724
+[gpub007:0/64] 2023-07-10 07:02:22,047 (trainer:732) INFO: 31epoch:train:3501-3600batch: iter_time=1.149e-04, forward_time=0.145, loss_ctc=71.009, loss_att=53.688, acc=0.682, loss=58.884, backward_time=1.026, grad_norm=125.002, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.525e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 07:04:37,666 (trainer:732) INFO: 31epoch:train:3601-3700batch: iter_time=1.182e-04, forward_time=0.144, loss_ctc=71.425, loss_att=57.283, acc=0.713, loss=61.525, backward_time=1.025, grad_norm=109.352, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.524e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 07:06:53,025 (trainer:732) INFO: 31epoch:train:3701-3800batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=66.441, loss_att=52.903, acc=0.720, loss=56.964, backward_time=1.022, grad_norm=104.171, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.523e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 07:09:08,320 (trainer:732) INFO: 31epoch:train:3801-3900batch: iter_time=1.332e-04, forward_time=0.143, loss_ctc=75.756, loss_att=59.384, acc=0.694, loss=64.296, backward_time=1.022, grad_norm=121.042, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.522e-05, train_time=2.706
+[gpub007:0/64] 2023-07-10 07:11:24,379 (trainer:732) INFO: 31epoch:train:3901-4000batch: iter_time=1.335e-04, forward_time=0.145, loss_ctc=73.930, loss_att=57.143, acc=0.710, loss=62.179, backward_time=1.024, grad_norm=126.698, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.521e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 07:13:44,981 (trainer:732) INFO: 31epoch:train:4001-4100batch: iter_time=1.409e-04, forward_time=0.145, loss_ctc=70.275, loss_att=52.938, acc=0.709, loss=58.139, backward_time=1.033, grad_norm=126.609, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.520e-05, train_time=2.812
+[gpub007:0/64] 2023-07-10 07:15:16,971 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-10 07:15:34,896 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 07:15:38,324 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65fdf2f430>)
+[gpub007:0/64] 2023-07-10 07:15:38,324 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-10 07:15:38,331 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 07:19:40,883 (trainer:732) INFO: 31epoch:train:4101-4200batch: iter_time=1.314, forward_time=0.189, loss_ctc=82.970, loss_att=61.416, acc=0.719, loss=67.882, backward_time=1.037, grad_norm=108.322, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.181, optim0_lr0=6.519e-05, train_time=7.118
+[gpub007:0/64] 2023-07-10 07:21:57,691 (trainer:732) INFO: 31epoch:train:4201-4300batch: iter_time=1.415e-04, forward_time=0.147, loss_ctc=83.531, loss_att=60.252, acc=0.708, loss=67.236, backward_time=1.028, grad_norm=125.113, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.518e-05, train_time=2.736
+[gpub007:0/64] 2023-07-10 07:24:16,169 (trainer:732) INFO: 31epoch:train:4301-4400batch: iter_time=1.291e-04, forward_time=0.146, loss_ctc=75.635, loss_att=58.019, acc=0.697, loss=63.304, backward_time=1.029, grad_norm=123.408, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.517e-05, train_time=2.769
+[gpub007:0/64] 2023-07-10 07:26:35,350 (trainer:732) INFO: 31epoch:train:4401-4500batch: iter_time=1.082e-04, forward_time=0.146, loss_ctc=67.863, loss_att=51.370, acc=0.716, loss=56.318, backward_time=1.030, grad_norm=91.973, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.515e-05, train_time=2.783
+[gpub007:0/64] 2023-07-10 07:28:58,182 (trainer:732) INFO: 31epoch:train:4501-4600batch: iter_time=1.202e-04, forward_time=0.145, loss_ctc=67.304, loss_att=53.372, acc=0.726, loss=57.551, backward_time=1.040, grad_norm=97.524, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.514e-05, train_time=2.856
+[gpub007:0/64] 2023-07-10 07:31:14,230 (trainer:732) INFO: 31epoch:train:4601-4700batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=74.767, loss_att=61.094, acc=0.703, loss=65.196, backward_time=1.028, grad_norm=120.756, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.513e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 07:33:33,530 (trainer:732) INFO: 31epoch:train:4701-4800batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=75.438, loss_att=57.519, acc=0.721, loss=62.895, backward_time=1.031, grad_norm=95.142, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.512e-05, train_time=2.786
+[gpub007:0/64] 2023-07-10 07:35:55,596 (trainer:732) INFO: 31epoch:train:4801-4900batch: iter_time=1.418e-04, forward_time=0.145, loss_ctc=68.786, loss_att=51.083, acc=0.717, loss=56.394, backward_time=1.030, grad_norm=92.529, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.511e-05, train_time=2.841
+[gpub007:0/64] 2023-07-10 07:38:20,546 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-10 07:38:38,979 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 07:38:42,415 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6628f31210>)
+[gpub007:0/64] 2023-07-10 07:38:42,415 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-10 07:38:42,421 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 07:41:52,558 (trainer:732) INFO: 31epoch:train:4901-5000batch: iter_time=2.120, forward_time=0.145, loss_ctc=81.459, loss_att=62.737, acc=0.703, loss=68.354, backward_time=1.031, grad_norm=105.027, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.510e-05, train_time=7.138
+[gpub007:0/64] 2023-07-10 07:44:10,065 (trainer:732) INFO: 31epoch:train:5001-5100batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=78.911, loss_att=55.235, acc=0.708, loss=62.338, backward_time=1.030, grad_norm=120.308, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.509e-05, train_time=2.751
+[gpub007:0/64] 2023-07-10 07:46:26,365 (trainer:732) INFO: 31epoch:train:5101-5200batch: iter_time=1.249e-04, forward_time=0.144, loss_ctc=77.632, loss_att=62.702, acc=0.705, loss=67.181, backward_time=1.027, grad_norm=108.082, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.508e-05, train_time=2.726
+[gpub007:0/64] 2023-07-10 07:48:41,519 (trainer:732) INFO: 31epoch:train:5201-5300batch: iter_time=1.358e-04, forward_time=0.144, loss_ctc=71.878, loss_att=53.872, acc=0.682, loss=59.274, backward_time=1.022, grad_norm=98.262, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.507e-05, train_time=2.703
+[gpub007:0/64] 2023-07-10 07:50:56,946 (trainer:732) INFO: 31epoch:train:5301-5400batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=65.458, loss_att=52.277, acc=0.719, loss=56.231, backward_time=1.022, grad_norm=89.313, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.506e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 07:53:12,319 (trainer:732) INFO: 31epoch:train:5401-5500batch: iter_time=1.247e-04, forward_time=0.144, loss_ctc=70.341, loss_att=56.945, acc=0.714, loss=60.964, backward_time=1.023, grad_norm=99.924, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.504e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 07:55:27,908 (trainer:732) INFO: 31epoch:train:5501-5600batch: iter_time=1.189e-04, forward_time=0.144, loss_ctc=75.973, loss_att=54.691, acc=0.715, loss=61.076, backward_time=1.024, grad_norm=113.552, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.503e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 07:57:43,611 (trainer:732) INFO: 31epoch:train:5601-5700batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=70.888, loss_att=58.277, acc=0.697, loss=62.060, backward_time=1.024, grad_norm=99.138, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.502e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 07:59:58,703 (trainer:732) INFO: 31epoch:train:5701-5800batch: iter_time=1.292e-04, forward_time=0.143, loss_ctc=78.522, loss_att=58.973, acc=0.707, loss=64.838, backward_time=1.022, grad_norm=105.973, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.179, optim0_lr0=6.501e-05, train_time=2.702
+[gpub007:0/64] 2023-07-10 08:00:43,914 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-10 08:01:01,700 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 08:01:05,118 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65fdb83340>)
+[gpub007:0/64] 2023-07-10 08:01:05,118 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-10 08:01:05,124 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 08:06:45,413 (trainer:732) INFO: 31epoch:train:5801-5900batch: iter_time=1.267, forward_time=0.145, loss_ctc=82.584, loss_att=58.726, acc=0.720, loss=65.883, backward_time=1.045, grad_norm=115.873, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.500e-05, train_time=8.134
+[gpub007:0/64] 2023-07-10 08:09:01,769 (trainer:732) INFO: 31epoch:train:5901-6000batch: iter_time=1.203e-04, forward_time=0.143, loss_ctc=80.148, loss_att=61.257, acc=0.703, loss=66.925, backward_time=1.027, grad_norm=120.235, clip=100.000, loss_scale=6.190e+26, optim_step_time=0.180, optim0_lr0=6.499e-05, train_time=2.727
+[gpub007:0/64] 2023-07-10 08:11:17,607 (trainer:732) INFO: 31epoch:train:6001-6100batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=72.256, loss_att=54.572, acc=0.693, loss=59.877, backward_time=1.025, grad_norm=124.845, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.498e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 08:13:33,227 (trainer:732) INFO: 31epoch:train:6101-6200batch: iter_time=1.243e-04, forward_time=0.145, loss_ctc=70.336, loss_att=55.625, acc=0.725, loss=60.038, backward_time=1.026, grad_norm=93.646, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.497e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 08:15:48,885 (trainer:732) INFO: 31epoch:train:6201-6300batch: iter_time=1.307e-04, forward_time=0.144, loss_ctc=65.446, loss_att=49.376, acc=0.730, loss=54.197, backward_time=1.026, grad_norm=98.134, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.496e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 08:18:04,660 (trainer:732) INFO: 31epoch:train:6301-6400batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=74.073, loss_att=57.696, acc=0.714, loss=62.609, backward_time=1.026, grad_norm=115.089, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.495e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 08:20:20,132 (trainer:732) INFO: 31epoch:train:6401-6500batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=74.540, loss_att=58.242, acc=0.710, loss=63.131, backward_time=1.024, grad_norm=105.523, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.493e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 08:22:35,251 (trainer:732) INFO: 31epoch:train:6501-6600batch: iter_time=1.212e-04, forward_time=0.144, loss_ctc=72.646, loss_att=54.097, acc=0.710, loss=59.662, backward_time=1.022, grad_norm=107.686, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.492e-05, train_time=2.702
+[gpub007:0/64] 2023-07-10 08:24:05,054 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-10 08:24:23,089 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 08:24:26,541 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f66a4d36c80>)
+[gpub007:0/64] 2023-07-10 08:24:26,541 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-10 08:24:26,547 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 08:29:44,559 (trainer:732) INFO: 31epoch:train:6601-6700batch: iter_time=1.282, forward_time=0.210, loss_ctc=83.654, loss_att=62.545, acc=0.723, loss=68.878, backward_time=1.041, grad_norm=123.852, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.186, optim0_lr0=6.491e-05, train_time=8.586
+[gpub007:0/64] 2023-07-10 08:32:01,287 (trainer:732) INFO: 31epoch:train:6701-6800batch: iter_time=1.242e-04, forward_time=0.144, loss_ctc=83.130, loss_att=63.649, acc=0.692, loss=69.493, backward_time=1.027, grad_norm=148.253, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.490e-05, train_time=2.734
+[gpub007:0/64] 2023-07-10 08:34:16,947 (trainer:732) INFO: 31epoch:train:6801-6900batch: iter_time=1.192e-04, forward_time=0.143, loss_ctc=73.381, loss_att=55.522, acc=0.697, loss=60.880, backward_time=1.026, grad_norm=109.650, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.489e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 08:36:32,545 (trainer:732) INFO: 31epoch:train:6901-7000batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=66.644, loss_att=49.879, acc=0.710, loss=54.909, backward_time=1.025, grad_norm=106.383, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.488e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 08:38:48,247 (trainer:732) INFO: 31epoch:train:7001-7100batch: iter_time=1.357e-04, forward_time=0.144, loss_ctc=67.145, loss_att=53.493, acc=0.726, loss=57.588, backward_time=1.026, grad_norm=91.546, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.487e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 08:41:03,720 (trainer:732) INFO: 31epoch:train:7101-7200batch: iter_time=1.105e-04, forward_time=0.143, loss_ctc=75.932, loss_att=61.631, acc=0.690, loss=65.921, backward_time=1.024, grad_norm=130.659, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.486e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 08:43:19,555 (trainer:732) INFO: 31epoch:train:7201-7300batch: iter_time=1.296e-04, forward_time=0.143, loss_ctc=75.534, loss_att=57.506, acc=0.721, loss=62.914, backward_time=1.029, grad_norm=103.295, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.485e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 08:45:34,885 (trainer:732) INFO: 31epoch:train:7301-7400batch: iter_time=1.103e-04, forward_time=0.143, loss_ctc=68.888, loss_att=51.318, acc=0.711, loss=56.589, backward_time=1.025, grad_norm=100.928, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.484e-05, train_time=2.706
+[gpub007:0/64] 2023-07-10 08:48:00,624 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-10 08:48:18,954 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 08:48:22,400 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65f3a03880>)
+[gpub007:0/64] 2023-07-10 08:48:22,401 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-10 08:48:22,407 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 08:51:10,030 (trainer:732) INFO: 31epoch:train:7401-7500batch: iter_time=1.625, forward_time=0.145, loss_ctc=80.592, loss_att=61.550, acc=0.707, loss=67.263, backward_time=1.040, grad_norm=113.468, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.483e-05, train_time=6.702
+[gpub007:0/64] 2023-07-10 08:53:27,886 (trainer:732) INFO: 31epoch:train:7501-7600batch: iter_time=1.200e-04, forward_time=0.144, loss_ctc=81.844, loss_att=63.816, acc=0.691, loss=69.224, backward_time=1.032, grad_norm=115.900, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.481e-05, train_time=2.757
+[gpub007:0/64] 2023-07-10 08:55:43,335 (trainer:732) INFO: 31epoch:train:7601-7700batch: iter_time=1.393e-04, forward_time=0.143, loss_ctc=75.429, loss_att=57.641, acc=0.689, loss=62.978, backward_time=1.022, grad_norm=105.578, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.480e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 08:57:58,931 (trainer:732) INFO: 31epoch:train:7701-7800batch: iter_time=1.332e-04, forward_time=0.144, loss_ctc=66.291, loss_att=50.761, acc=0.702, loss=55.420, backward_time=1.023, grad_norm=93.757, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.479e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 09:00:14,620 (trainer:732) INFO: 31epoch:train:7801-7900batch: iter_time=1.341e-04, forward_time=0.145, loss_ctc=68.084, loss_att=54.240, acc=0.722, loss=58.393, backward_time=1.024, grad_norm=100.937, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.478e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 09:02:30,275 (trainer:732) INFO: 31epoch:train:7901-8000batch: iter_time=1.378e-04, forward_time=0.144, loss_ctc=75.184, loss_att=61.215, acc=0.689, loss=65.405, backward_time=1.024, grad_norm=117.796, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.477e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 09:04:45,719 (trainer:732) INFO: 31epoch:train:8001-8100batch: iter_time=1.415e-04, forward_time=0.144, loss_ctc=74.700, loss_att=56.760, acc=0.718, loss=62.142, backward_time=1.024, grad_norm=102.300, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.476e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 09:07:01,307 (trainer:732) INFO: 31epoch:train:8101-8200batch: iter_time=1.358e-04, forward_time=0.145, loss_ctc=69.805, loss_att=51.574, acc=0.708, loss=57.043, backward_time=1.025, grad_norm=97.683, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.475e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 09:09:16,921 (trainer:732) INFO: 31epoch:train:8201-8300batch: iter_time=1.188e-04, forward_time=0.144, loss_ctc=80.779, loss_att=62.335, acc=0.703, loss=67.868, backward_time=1.027, grad_norm=122.167, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.474e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 09:10:01,629 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-10 09:10:19,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 09:10:23,505 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f60cf4a37c0>)
+[gpub007:0/64] 2023-07-10 09:10:23,505 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-10 09:10:23,551 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 09:15:20,591 (trainer:732) INFO: 31epoch:train:8301-8400batch: iter_time=2.196, forward_time=0.193, loss_ctc=78.114, loss_att=57.819, acc=0.701, loss=63.908, backward_time=1.038, grad_norm=115.633, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.182, optim0_lr0=6.473e-05, train_time=7.273
+[gpub007:0/64] 2023-07-10 09:17:36,929 (trainer:732) INFO: 31epoch:train:8401-8500batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=79.033, loss_att=62.958, acc=0.703, loss=67.781, backward_time=1.026, grad_norm=125.948, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.472e-05, train_time=2.727
+[gpub007:0/64] 2023-07-10 09:19:53,048 (trainer:732) INFO: 31epoch:train:8501-8600batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=72.547, loss_att=54.310, acc=0.695, loss=59.781, backward_time=1.027, grad_norm=104.981, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.471e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 09:22:08,655 (trainer:732) INFO: 31epoch:train:8601-8700batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=66.497, loss_att=52.702, acc=0.720, loss=56.841, backward_time=1.024, grad_norm=113.915, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.470e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 09:24:24,118 (trainer:732) INFO: 31epoch:train:8701-8800batch: iter_time=1.177e-04, forward_time=0.143, loss_ctc=72.030, loss_att=56.814, acc=0.718, loss=61.379, backward_time=1.024, grad_norm=103.505, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.468e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 09:26:39,551 (trainer:732) INFO: 31epoch:train:8801-8900batch: iter_time=1.315e-04, forward_time=0.143, loss_ctc=73.158, loss_att=54.480, acc=0.717, loss=60.083, backward_time=1.023, grad_norm=123.351, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.467e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 09:28:55,177 (trainer:732) INFO: 31epoch:train:8901-9000batch: iter_time=1.291e-04, forward_time=0.143, loss_ctc=72.336, loss_att=60.053, acc=0.698, loss=63.738, backward_time=1.024, grad_norm=113.711, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.466e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 09:31:10,632 (trainer:732) INFO: 31epoch:train:9001-9100batch: iter_time=1.258e-04, forward_time=0.144, loss_ctc=75.572, loss_att=57.949, acc=0.714, loss=63.236, backward_time=1.023, grad_norm=120.305, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.465e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 09:32:41,587 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-10 09:32:59,831 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 09:33:03,249 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65dffd37c0>)
+[gpub007:0/64] 2023-07-10 09:33:03,249 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-10 09:33:03,256 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 09:37:30,226 (trainer:732) INFO: 31epoch:train:9101-9200batch: iter_time=1.257, forward_time=0.144, loss_ctc=80.434, loss_att=59.519, acc=0.706, loss=65.793, backward_time=1.033, grad_norm=119.357, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.464e-05, train_time=7.592
+[gpub007:0/64] 2023-07-10 09:39:47,466 (trainer:732) INFO: 31epoch:train:9201-9300batch: iter_time=1.592e-04, forward_time=0.144, loss_ctc=82.936, loss_att=65.707, acc=0.686, loss=70.875, backward_time=1.027, grad_norm=109.290, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.463e-05, train_time=2.745
+[gpub007:0/64] 2023-07-10 09:42:02,890 (trainer:732) INFO: 31epoch:train:9301-9400batch: iter_time=1.533e-04, forward_time=0.143, loss_ctc=72.261, loss_att=53.272, acc=0.683, loss=58.968, backward_time=1.023, grad_norm=132.530, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.462e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 09:44:18,793 (trainer:732) INFO: 31epoch:train:9401-9500batch: iter_time=1.449e-04, forward_time=0.143, loss_ctc=68.399, loss_att=55.461, acc=0.714, loss=59.342, backward_time=1.023, grad_norm=105.996, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.461e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 09:46:34,261 (trainer:732) INFO: 31epoch:train:9501-9600batch: iter_time=1.459e-04, forward_time=0.144, loss_ctc=67.609, loss_att=52.302, acc=0.721, loss=56.894, backward_time=1.024, grad_norm=99.282, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.460e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 09:48:49,605 (trainer:732) INFO: 31epoch:train:9601-9700batch: iter_time=1.457e-04, forward_time=0.144, loss_ctc=74.109, loss_att=57.686, acc=0.702, loss=62.613, backward_time=1.023, grad_norm=106.725, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.459e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 09:51:07,550 (trainer:732) INFO: 31epoch:train:9701-9800batch: iter_time=1.520e-04, forward_time=0.144, loss_ctc=71.197, loss_att=55.314, acc=0.709, loss=60.079, backward_time=1.024, grad_norm=104.266, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.179, optim0_lr0=6.458e-05, train_time=2.759
+[gpub007:0/64] 2023-07-10 09:53:35,541 (trainer:732) INFO: 31epoch:train:9801-9900batch: iter_time=1.495e-04, forward_time=0.144, loss_ctc=70.200, loss_att=53.040, acc=0.709, loss=58.188, backward_time=1.043, grad_norm=91.119, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.457e-05, train_time=2.960
+[gpub007:0/64] 2023-07-10 09:55:51,048 (trainer:732) INFO: 31epoch:train:9901-10000batch: iter_time=1.194e-04, forward_time=0.143, loss_ctc=84.235, loss_att=62.484, acc=0.700, loss=69.010, backward_time=1.024, grad_norm=125.985, clip=100.000, loss_scale=1.238e+27, optim_step_time=0.180, optim0_lr0=6.455e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 10:09:21,822 (trainer:338) INFO: 31epoch results: [train] iter_time=0.188, forward_time=0.147, loss_ctc=74.729, loss_att=57.404, acc=0.706, loss=62.601, backward_time=1.028, grad_norm=110.689, clip=100.000, loss_scale=8.047e+26, optim_step_time=0.180, optim0_lr0=6.510e-05, train_time=3.369, time=4 hours, 40 minutes and 55.22 seconds, total_count=280000, gpu_max_cached_mem_GB=33.922, [valid] loss_ctc=46.086, cer_ctc=0.262, loss_att=39.649, acc=0.676, cer=0.383, wer=0.989, loss=41.580, time=7 minutes and 17.18 seconds, total_count=28842, gpu_max_cached_mem_GB=37.217, [att_plot] time=6 minutes and 2.16 seconds, total_count=0, gpu_max_cached_mem_GB=37.217
+[gpub007:0/64] 2023-07-10 10:09:37,640 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-10 10:09:37,785 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/16epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/26epoch.pth
+[gpub007:0/64] 2023-07-10 10:09:37,785 (trainer:272) INFO: 32/50epoch started. Estimated time to finish: 3 days, 21 hours and 15 minutes
+[gpub007:0/64] 2023-07-10 10:09:37,790 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-10 10:09:55,388 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 10:09:58,777 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5ae39d00a0>)
+[gpub007:0/64] 2023-07-10 10:09:58,777 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-10 10:09:58,783 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 10:15:01,640 (trainer:732) INFO: 32epoch:train:1-100batch: iter_time=1.814, forward_time=0.180, loss_ctc=82.775, loss_att=62.427, acc=0.694, loss=68.532, backward_time=1.042, grad_norm=129.186, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.184, optim0_lr0=6.454e-05, train_time=6.476
+[gpub007:0/64] 2023-07-10 10:17:18,393 (trainer:732) INFO: 32epoch:train:101-200batch: iter_time=1.226e-04, forward_time=0.146, loss_ctc=70.225, loss_att=55.701, acc=0.698, loss=60.058, backward_time=1.031, grad_norm=103.046, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.453e-05, train_time=2.735
+[gpub007:0/64] 2023-07-10 10:19:36,050 (trainer:732) INFO: 32epoch:train:201-300batch: iter_time=1.315e-04, forward_time=0.145, loss_ctc=69.830, loss_att=52.953, acc=0.705, loss=58.017, backward_time=1.035, grad_norm=112.692, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.452e-05, train_time=2.753
+[gpub007:0/64] 2023-07-10 10:21:55,852 (trainer:732) INFO: 32epoch:train:301-400batch: iter_time=1.219e-04, forward_time=0.146, loss_ctc=69.046, loss_att=51.545, acc=0.710, loss=56.796, backward_time=1.032, grad_norm=115.326, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.451e-05, train_time=2.796
+[gpub007:0/64] 2023-07-10 10:24:15,339 (trainer:732) INFO: 32epoch:train:401-500batch: iter_time=1.285e-04, forward_time=0.147, loss_ctc=72.322, loss_att=57.455, acc=0.704, loss=61.915, backward_time=1.034, grad_norm=102.002, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.450e-05, train_time=2.790
+[gpub007:0/64] 2023-07-10 10:26:34,466 (trainer:732) INFO: 32epoch:train:501-600batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=75.248, loss_att=53.922, acc=0.701, loss=60.320, backward_time=1.031, grad_norm=111.890, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.449e-05, train_time=2.782
+[gpub007:0/64] 2023-07-10 10:28:52,900 (trainer:732) INFO: 32epoch:train:601-700batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=59.986, loss_att=45.714, acc=0.701, loss=49.995, backward_time=1.029, grad_norm=97.759, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.448e-05, train_time=2.768
+[gpub007:0/64] 2023-07-10 10:31:08,940 (trainer:732) INFO: 32epoch:train:701-800batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=69.317, loss_att=53.875, acc=0.699, loss=58.507, backward_time=1.030, grad_norm=114.017, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.447e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 10:32:04,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-10 10:32:21,828 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 10:32:25,150 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aeca8a710>)
+[gpub007:0/64] 2023-07-10 10:32:25,150 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-10 10:32:25,157 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 10:36:36,626 (trainer:732) INFO: 32epoch:train:801-900batch: iter_time=1.817, forward_time=0.170, loss_ctc=79.801, loss_att=65.876, acc=0.693, loss=70.053, backward_time=1.042, grad_norm=145.894, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.184, optim0_lr0=6.446e-05, train_time=6.553
+[gpub007:0/64] 2023-07-10 10:38:53,804 (trainer:732) INFO: 32epoch:train:901-1000batch: iter_time=1.214e-04, forward_time=0.148, loss_ctc=73.607, loss_att=56.551, acc=0.700, loss=61.668, backward_time=1.032, grad_norm=114.270, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.445e-05, train_time=2.744
+[gpub007:0/64] 2023-07-10 10:41:09,840 (trainer:732) INFO: 32epoch:train:1001-1100batch: iter_time=1.209e-04, forward_time=0.147, loss_ctc=69.558, loss_att=52.276, acc=0.710, loss=57.461, backward_time=1.030, grad_norm=110.727, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.444e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 10:43:25,512 (trainer:732) INFO: 32epoch:train:1101-1200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=68.257, loss_att=50.284, acc=0.712, loss=55.676, backward_time=1.028, grad_norm=99.052, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.443e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 10:45:41,637 (trainer:732) INFO: 32epoch:train:1201-1300batch: iter_time=1.417e-04, forward_time=0.147, loss_ctc=73.110, loss_att=59.212, acc=0.706, loss=63.382, backward_time=1.031, grad_norm=103.008, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.442e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 10:47:57,371 (trainer:732) INFO: 32epoch:train:1301-1400batch: iter_time=1.430e-04, forward_time=0.146, loss_ctc=73.637, loss_att=52.926, acc=0.709, loss=59.139, backward_time=1.029, grad_norm=114.534, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.440e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 10:50:12,842 (trainer:732) INFO: 32epoch:train:1401-1500batch: iter_time=1.489e-04, forward_time=0.146, loss_ctc=59.377, loss_att=45.652, acc=0.704, loss=49.769, backward_time=1.026, grad_norm=93.770, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.439e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 10:52:28,905 (trainer:732) INFO: 32epoch:train:1501-1600batch: iter_time=1.378e-04, forward_time=0.148, loss_ctc=66.884, loss_att=52.542, acc=0.706, loss=56.845, backward_time=1.030, grad_norm=106.371, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.438e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 10:54:07,460 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-10 10:54:25,438 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 10:54:28,896 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6629015f30>)
+[gpub007:0/64] 2023-07-10 10:54:28,896 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-10 10:54:28,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 10:57:20,848 (trainer:732) INFO: 32epoch:train:1601-1700batch: iter_time=1.428, forward_time=0.147, loss_ctc=77.393, loss_att=63.046, acc=0.702, loss=67.350, backward_time=1.042, grad_norm=142.975, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.437e-05, train_time=5.839
+[gpub007:0/64] 2023-07-10 10:59:37,739 (trainer:732) INFO: 32epoch:train:1701-1800batch: iter_time=1.210e-04, forward_time=0.147, loss_ctc=78.024, loss_att=60.530, acc=0.692, loss=65.778, backward_time=1.033, grad_norm=110.661, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.436e-05, train_time=2.738
+[gpub007:0/64] 2023-07-10 11:01:53,900 (trainer:732) INFO: 32epoch:train:1801-1900batch: iter_time=1.239e-04, forward_time=0.148, loss_ctc=71.371, loss_att=54.012, acc=0.713, loss=59.220, backward_time=1.030, grad_norm=100.104, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.435e-05, train_time=2.723
+[gpub007:0/64] 2023-07-10 11:04:09,992 (trainer:732) INFO: 32epoch:train:1901-2000batch: iter_time=1.372e-04, forward_time=0.148, loss_ctc=67.395, loss_att=47.049, acc=0.715, loss=53.153, backward_time=1.029, grad_norm=104.062, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.434e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 11:06:26,406 (trainer:732) INFO: 32epoch:train:2001-2100batch: iter_time=1.225e-04, forward_time=0.149, loss_ctc=69.227, loss_att=55.455, acc=0.705, loss=59.586, backward_time=1.033, grad_norm=113.900, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.433e-05, train_time=2.728
+[gpub007:0/64] 2023-07-10 11:08:42,299 (trainer:732) INFO: 32epoch:train:2101-2200batch: iter_time=1.217e-04, forward_time=0.147, loss_ctc=74.120, loss_att=54.366, acc=0.708, loss=60.292, backward_time=1.029, grad_norm=106.641, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.432e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 11:10:58,128 (trainer:732) INFO: 32epoch:train:2201-2300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=63.109, loss_att=45.315, acc=0.717, loss=50.653, backward_time=1.027, grad_norm=99.928, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.431e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 11:13:14,254 (trainer:732) INFO: 32epoch:train:2301-2400batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=64.608, loss_att=49.717, acc=0.706, loss=54.185, backward_time=1.030, grad_norm=105.393, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.430e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 11:15:30,280 (trainer:732) INFO: 32epoch:train:2401-2500batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=75.807, loss_att=62.505, acc=0.697, loss=66.495, backward_time=1.029, grad_norm=115.518, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.429e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 11:15:32,738 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-10 11:15:50,744 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 11:15:54,388 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f686e483e20>)
+[gpub007:0/64] 2023-07-10 11:15:54,388 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-10 11:15:54,395 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 11:21:21,406 (trainer:732) INFO: 32epoch:train:2501-2600batch: iter_time=1.212, forward_time=0.146, loss_ctc=77.847, loss_att=58.631, acc=0.703, loss=64.396, backward_time=1.043, grad_norm=128.562, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.428e-05, train_time=7.022
+[gpub007:0/64] 2023-07-10 11:23:38,061 (trainer:732) INFO: 32epoch:train:2601-2700batch: iter_time=1.351e-04, forward_time=0.147, loss_ctc=72.761, loss_att=54.966, acc=0.706, loss=60.305, backward_time=1.030, grad_norm=118.684, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.427e-05, train_time=2.733
+[gpub007:0/64] 2023-07-10 11:25:53,614 (trainer:732) INFO: 32epoch:train:2701-2800batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=69.898, loss_att=52.190, acc=0.709, loss=57.503, backward_time=1.027, grad_norm=111.867, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.426e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 11:28:09,312 (trainer:732) INFO: 32epoch:train:2801-2900batch: iter_time=1.317e-04, forward_time=0.146, loss_ctc=68.982, loss_att=50.475, acc=0.714, loss=56.027, backward_time=1.027, grad_norm=97.441, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.424e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 11:30:25,389 (trainer:732) INFO: 32epoch:train:2901-3000batch: iter_time=1.381e-04, forward_time=0.147, loss_ctc=71.362, loss_att=57.252, acc=0.708, loss=61.485, backward_time=1.029, grad_norm=125.606, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.423e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 11:32:41,218 (trainer:732) INFO: 32epoch:train:3001-3100batch: iter_time=1.400e-04, forward_time=0.146, loss_ctc=73.432, loss_att=52.328, acc=0.709, loss=58.659, backward_time=1.028, grad_norm=109.991, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.422e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 11:34:56,587 (trainer:732) INFO: 32epoch:train:3101-3200batch: iter_time=1.361e-04, forward_time=0.146, loss_ctc=59.109, loss_att=45.449, acc=0.707, loss=49.547, backward_time=1.024, grad_norm=98.481, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.421e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 11:37:42,503 (trainer:732) INFO: 32epoch:train:3201-3300batch: iter_time=0.002, forward_time=0.155, loss_ctc=69.064, loss_att=52.243, acc=0.707, loss=57.290, backward_time=1.098, grad_norm=102.694, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.184, optim0_lr0=6.420e-05, train_time=3.318
+[gpub007:0/64] 2023-07-10 11:38:35,709 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-10 11:38:53,533 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 11:38:56,971 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6628eb4a30>)
+[gpub007:0/64] 2023-07-10 11:38:56,971 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-10 11:38:57,040 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 11:43:38,389 (trainer:732) INFO: 32epoch:train:3301-3400batch: iter_time=1.723, forward_time=0.161, loss_ctc=72.328, loss_att=59.143, acc=0.711, loss=63.098, backward_time=1.048, grad_norm=114.283, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.419e-05, train_time=7.117
+[gpub007:0/64] 2023-07-10 11:45:55,352 (trainer:732) INFO: 32epoch:train:3401-3500batch: iter_time=1.093e-04, forward_time=0.148, loss_ctc=76.344, loss_att=61.000, acc=0.693, loss=65.603, backward_time=1.032, grad_norm=126.536, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.418e-05, train_time=2.739
+[gpub007:0/64] 2023-07-10 11:48:11,190 (trainer:732) INFO: 32epoch:train:3501-3600batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=69.903, loss_att=53.202, acc=0.714, loss=58.212, backward_time=1.028, grad_norm=110.976, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.417e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 11:50:26,933 (trainer:732) INFO: 32epoch:train:3601-3700batch: iter_time=1.107e-04, forward_time=0.146, loss_ctc=66.324, loss_att=46.756, acc=0.720, loss=52.626, backward_time=1.028, grad_norm=97.591, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.416e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 11:52:43,212 (trainer:732) INFO: 32epoch:train:3701-3800batch: iter_time=1.318e-04, forward_time=0.147, loss_ctc=70.566, loss_att=55.587, acc=0.712, loss=60.081, backward_time=1.031, grad_norm=103.444, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.183, optim0_lr0=6.415e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 11:54:59,415 (trainer:732) INFO: 32epoch:train:3801-3900batch: iter_time=1.432e-04, forward_time=0.148, loss_ctc=71.701, loss_att=53.401, acc=0.713, loss=58.891, backward_time=1.029, grad_norm=132.687, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.414e-05, train_time=2.724
+[gpub007:0/64] 2023-07-10 11:57:15,042 (trainer:732) INFO: 32epoch:train:3901-4000batch: iter_time=1.482e-04, forward_time=0.146, loss_ctc=62.956, loss_att=46.177, acc=0.713, loss=51.211, backward_time=1.027, grad_norm=131.080, clip=100.000, loss_scale=2.476e+27, optim_step_time=0.182, optim0_lr0=6.413e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 11:59:30,462 (trainer:732) INFO: 32epoch:train:4001-4100batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=65.052, loss_att=51.279, acc=0.705, loss=55.411, backward_time=1.025, grad_norm=121.887, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.412e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 12:01:02,920 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-10 12:01:21,167 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 12:01:24,596 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b3c625390>)
+[gpub007:0/64] 2023-07-10 12:01:24,596 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-10 12:01:24,603 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 12:06:08,873 (trainer:732) INFO: 32epoch:train:4101-4200batch: iter_time=1.328, forward_time=0.146, loss_ctc=73.547, loss_att=59.674, acc=0.699, loss=63.836, backward_time=1.039, grad_norm=122.563, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.411e-05, train_time=7.968
+[gpub007:0/64] 2023-07-10 12:08:25,367 (trainer:732) INFO: 32epoch:train:4201-4300batch: iter_time=1.381e-04, forward_time=0.146, loss_ctc=76.839, loss_att=57.023, acc=0.705, loss=62.968, backward_time=1.031, grad_norm=125.391, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.410e-05, train_time=2.730
+[gpub007:0/64] 2023-07-10 12:10:41,236 (trainer:732) INFO: 32epoch:train:4301-4400batch: iter_time=1.364e-04, forward_time=0.144, loss_ctc=73.277, loss_att=58.712, acc=0.699, loss=63.081, backward_time=1.029, grad_norm=115.620, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.409e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 12:12:57,019 (trainer:732) INFO: 32epoch:train:4401-4500batch: iter_time=1.328e-04, forward_time=0.145, loss_ctc=67.384, loss_att=46.579, acc=0.720, loss=52.821, backward_time=1.027, grad_norm=108.017, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.408e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 12:15:12,654 (trainer:732) INFO: 32epoch:train:4501-4600batch: iter_time=1.333e-04, forward_time=0.145, loss_ctc=68.630, loss_att=53.059, acc=0.714, loss=57.730, backward_time=1.026, grad_norm=107.417, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.407e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 12:17:28,644 (trainer:732) INFO: 32epoch:train:4601-4700batch: iter_time=1.533e-04, forward_time=0.146, loss_ctc=73.425, loss_att=58.843, acc=0.699, loss=63.218, backward_time=1.029, grad_norm=116.945, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.405e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 12:19:44,101 (trainer:732) INFO: 32epoch:train:4701-4800batch: iter_time=1.360e-04, forward_time=0.145, loss_ctc=64.127, loss_att=46.817, acc=0.711, loss=52.010, backward_time=1.025, grad_norm=111.013, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.404e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 12:21:59,491 (trainer:732) INFO: 32epoch:train:4801-4900batch: iter_time=1.368e-04, forward_time=0.144, loss_ctc=60.258, loss_att=46.765, acc=0.703, loss=50.813, backward_time=1.026, grad_norm=106.045, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.403e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 12:24:15,527 (trainer:732) INFO: 32epoch:train:4901-5000batch: iter_time=1.372e-04, forward_time=0.146, loss_ctc=70.694, loss_att=60.402, acc=0.692, loss=63.489, backward_time=1.029, grad_norm=96.616, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.402e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 12:24:16,949 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-10 12:24:35,286 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 12:24:38,751 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f596a43b4f0>)
+[gpub007:0/64] 2023-07-10 12:24:38,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-10 12:24:38,757 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 12:31:22,822 (trainer:732) INFO: 32epoch:train:5001-5100batch: iter_time=1.206, forward_time=0.146, loss_ctc=78.031, loss_att=58.243, acc=0.700, loss=64.179, backward_time=1.046, grad_norm=118.668, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.401e-05, train_time=8.546
+[gpub007:0/64] 2023-07-10 12:33:39,075 (trainer:732) INFO: 32epoch:train:5101-5200batch: iter_time=1.075e-04, forward_time=0.145, loss_ctc=70.920, loss_att=54.269, acc=0.702, loss=59.264, backward_time=1.030, grad_norm=98.155, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.400e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 12:35:55,015 (trainer:732) INFO: 32epoch:train:5201-5300batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=69.300, loss_att=51.324, acc=0.714, loss=56.717, backward_time=1.028, grad_norm=100.654, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.399e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 12:38:10,958 (trainer:732) INFO: 32epoch:train:5301-5400batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=67.564, loss_att=49.426, acc=0.714, loss=54.867, backward_time=1.027, grad_norm=110.512, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.398e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 12:40:27,203 (trainer:732) INFO: 32epoch:train:5401-5500batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=71.342, loss_att=58.549, acc=0.706, loss=62.387, backward_time=1.029, grad_norm=106.432, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.397e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 12:42:42,776 (trainer:732) INFO: 32epoch:train:5501-5600batch: iter_time=1.331e-04, forward_time=0.145, loss_ctc=74.444, loss_att=52.461, acc=0.705, loss=59.055, backward_time=1.026, grad_norm=109.611, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.396e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 12:44:58,577 (trainer:732) INFO: 32epoch:train:5601-5700batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=58.589, loss_att=43.738, acc=0.712, loss=48.193, backward_time=1.027, grad_norm=92.158, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.395e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 12:47:13,794 (trainer:732) INFO: 32epoch:train:5701-5800batch: iter_time=1.385e-04, forward_time=0.143, loss_ctc=69.496, loss_att=52.929, acc=0.699, loss=57.899, backward_time=1.024, grad_norm=111.627, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.394e-05, train_time=2.704
+[gpub007:0/64] 2023-07-10 12:48:02,338 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-10 12:48:20,314 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 12:48:23,726 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65fcaa9090>)
+[gpub007:0/64] 2023-07-10 12:48:23,726 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-10 12:48:23,732 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 12:53:10,280 (trainer:732) INFO: 32epoch:train:5801-5900batch: iter_time=1.251, forward_time=0.145, loss_ctc=75.374, loss_att=62.102, acc=0.695, loss=66.084, backward_time=1.042, grad_norm=129.946, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.393e-05, train_time=7.130
+[gpub007:0/64] 2023-07-10 12:55:26,949 (trainer:732) INFO: 32epoch:train:5901-6000batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=71.717, loss_att=54.986, acc=0.703, loss=60.005, backward_time=1.031, grad_norm=113.340, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.392e-05, train_time=2.733
+[gpub007:0/64] 2023-07-10 12:57:42,683 (trainer:732) INFO: 32epoch:train:6001-6100batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=69.446, loss_att=51.857, acc=0.709, loss=57.133, backward_time=1.028, grad_norm=107.204, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.391e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 12:59:58,269 (trainer:732) INFO: 32epoch:train:6101-6200batch: iter_time=1.363e-04, forward_time=0.145, loss_ctc=66.951, loss_att=48.706, acc=0.715, loss=54.179, backward_time=1.027, grad_norm=103.728, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.390e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 13:02:14,191 (trainer:732) INFO: 32epoch:train:6201-6300batch: iter_time=1.396e-04, forward_time=0.146, loss_ctc=71.430, loss_att=59.193, acc=0.709, loss=62.864, backward_time=1.029, grad_norm=102.881, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.389e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 13:04:29,947 (trainer:732) INFO: 32epoch:train:6301-6400batch: iter_time=1.312e-04, forward_time=0.145, loss_ctc=72.094, loss_att=52.015, acc=0.704, loss=58.039, backward_time=1.027, grad_norm=119.327, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.388e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 13:06:45,375 (trainer:732) INFO: 32epoch:train:6401-6500batch: iter_time=1.612e-04, forward_time=0.146, loss_ctc=59.809, loss_att=45.554, acc=0.702, loss=49.831, backward_time=1.027, grad_norm=100.219, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.387e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 13:09:01,119 (trainer:732) INFO: 32epoch:train:6501-6600batch: iter_time=1.443e-04, forward_time=0.146, loss_ctc=66.079, loss_att=52.255, acc=0.696, loss=56.402, backward_time=1.025, grad_norm=113.589, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.386e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 13:10:33,347 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-10 13:10:51,695 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 13:10:55,130 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6629f89de0>)
+[gpub007:0/64] 2023-07-10 13:10:55,130 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-10 13:10:55,136 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 13:14:27,555 (trainer:732) INFO: 32epoch:train:6601-6700batch: iter_time=1.221, forward_time=0.147, loss_ctc=75.588, loss_att=61.507, acc=0.698, loss=65.731, backward_time=1.044, grad_norm=117.004, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.385e-05, train_time=6.528
+[gpub007:0/64] 2023-07-10 13:16:44,594 (trainer:732) INFO: 32epoch:train:6701-6800batch: iter_time=1.203e-04, forward_time=0.147, loss_ctc=76.539, loss_att=59.523, acc=0.689, loss=64.628, backward_time=1.032, grad_norm=108.854, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.384e-05, train_time=2.741
+[gpub007:0/64] 2023-07-10 13:19:00,544 (trainer:732) INFO: 32epoch:train:6801-6900batch: iter_time=1.254e-04, forward_time=0.148, loss_ctc=69.941, loss_att=54.051, acc=0.712, loss=58.818, backward_time=1.030, grad_norm=105.180, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.382e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 13:21:16,238 (trainer:732) INFO: 32epoch:train:6901-7000batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=66.098, loss_att=47.072, acc=0.717, loss=52.780, backward_time=1.028, grad_norm=117.263, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.381e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 13:23:32,091 (trainer:732) INFO: 32epoch:train:7001-7100batch: iter_time=1.335e-04, forward_time=0.147, loss_ctc=69.137, loss_att=57.624, acc=0.702, loss=61.078, backward_time=1.029, grad_norm=111.250, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.380e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 13:25:47,876 (trainer:732) INFO: 32epoch:train:7101-7200batch: iter_time=1.346e-04, forward_time=0.147, loss_ctc=72.417, loss_att=53.978, acc=0.710, loss=59.509, backward_time=1.028, grad_norm=108.501, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.379e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 13:28:03,438 (trainer:732) INFO: 32epoch:train:7201-7300batch: iter_time=1.262e-04, forward_time=0.147, loss_ctc=61.759, loss_att=45.059, acc=0.713, loss=50.069, backward_time=1.027, grad_norm=92.099, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.378e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 13:30:19,184 (trainer:732) INFO: 32epoch:train:7301-7400batch: iter_time=1.197e-04, forward_time=0.147, loss_ctc=62.772, loss_att=49.481, acc=0.702, loss=53.468, backward_time=1.028, grad_norm=103.313, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.377e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 13:32:34,883 (trainer:732) INFO: 32epoch:train:7401-7500batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=72.987, loss_att=60.960, acc=0.690, loss=64.568, backward_time=1.027, grad_norm=99.943, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.183, optim0_lr0=6.376e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 13:32:40,288 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-10 13:32:58,275 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 13:33:01,846 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6629f937c0>)
+[gpub007:0/64] 2023-07-10 13:33:01,846 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-10 13:33:01,852 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 13:38:46,434 (trainer:732) INFO: 32epoch:train:7501-7600batch: iter_time=1.307, forward_time=0.147, loss_ctc=75.299, loss_att=57.418, acc=0.701, loss=62.782, backward_time=1.041, grad_norm=137.132, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.375e-05, train_time=7.431
+[gpub007:0/64] 2023-07-10 13:41:02,707 (trainer:732) INFO: 32epoch:train:7601-7700batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=71.877, loss_att=54.967, acc=0.708, loss=60.040, backward_time=1.029, grad_norm=104.384, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.374e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 13:43:18,360 (trainer:732) INFO: 32epoch:train:7701-7800batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=67.383, loss_att=50.826, acc=0.711, loss=55.793, backward_time=1.027, grad_norm=109.530, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.373e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 13:45:33,983 (trainer:732) INFO: 32epoch:train:7801-7900batch: iter_time=1.142e-04, forward_time=0.145, loss_ctc=68.067, loss_att=49.300, acc=0.717, loss=54.930, backward_time=1.027, grad_norm=104.781, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.372e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 13:47:49,561 (trainer:732) INFO: 32epoch:train:7901-8000batch: iter_time=1.144e-04, forward_time=0.144, loss_ctc=72.664, loss_att=58.684, acc=0.706, loss=62.878, backward_time=1.026, grad_norm=118.286, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.371e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 13:50:08,812 (trainer:732) INFO: 32epoch:train:8001-8100batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=73.128, loss_att=52.237, acc=0.702, loss=58.505, backward_time=1.041, grad_norm=113.324, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.370e-05, train_time=2.785
+[gpub007:0/64] 2023-07-10 13:52:24,009 (trainer:732) INFO: 32epoch:train:8101-8200batch: iter_time=1.130e-04, forward_time=0.143, loss_ctc=58.571, loss_att=43.958, acc=0.708, loss=48.342, backward_time=1.025, grad_norm=93.754, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.369e-05, train_time=2.704
+[gpub007:0/64] 2023-07-10 13:54:53,526 (trainer:732) INFO: 32epoch:train:8201-8300batch: iter_time=1.266e-04, forward_time=0.154, loss_ctc=66.214, loss_att=51.471, acc=0.702, loss=55.894, backward_time=1.042, grad_norm=108.612, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.368e-05, train_time=2.990
+[gpub007:0/64] 2023-07-10 13:55:44,899 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-10 13:56:03,257 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 13:56:06,962 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f602a77ffd0>)
+[gpub007:0/64] 2023-07-10 13:56:06,962 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-10 13:56:06,968 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 14:00:18,628 (trainer:732) INFO: 32epoch:train:8301-8400batch: iter_time=1.336, forward_time=0.146, loss_ctc=71.560, loss_att=56.880, acc=0.708, loss=61.284, backward_time=1.046, grad_norm=102.825, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.367e-05, train_time=6.502
+[gpub007:0/64] 2023-07-10 14:02:35,182 (trainer:732) INFO: 32epoch:train:8401-8500batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=75.128, loss_att=59.116, acc=0.694, loss=63.920, backward_time=1.029, grad_norm=112.125, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.366e-05, train_time=2.731
+[gpub007:0/64] 2023-07-10 14:04:51,087 (trainer:732) INFO: 32epoch:train:8501-8600batch: iter_time=1.232e-04, forward_time=0.145, loss_ctc=69.914, loss_att=51.710, acc=0.714, loss=57.171, backward_time=1.028, grad_norm=108.415, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.365e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 14:07:06,663 (trainer:732) INFO: 32epoch:train:8601-8700batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=66.599, loss_att=46.353, acc=0.723, loss=52.427, backward_time=1.026, grad_norm=94.420, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.364e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 14:09:22,548 (trainer:732) INFO: 32epoch:train:8701-8800batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=69.481, loss_att=57.050, acc=0.703, loss=60.780, backward_time=1.028, grad_norm=100.663, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.363e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 14:11:38,476 (trainer:732) INFO: 32epoch:train:8801-8900batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=72.168, loss_att=53.037, acc=0.711, loss=58.776, backward_time=1.028, grad_norm=122.336, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.362e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 14:13:54,152 (trainer:732) INFO: 32epoch:train:8901-9000batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=61.344, loss_att=44.963, acc=0.711, loss=49.877, backward_time=1.026, grad_norm=95.392, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.361e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 14:16:09,760 (trainer:732) INFO: 32epoch:train:9001-9100batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=62.596, loss_att=49.119, acc=0.703, loss=53.162, backward_time=1.026, grad_norm=96.887, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.360e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 14:17:42,042 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-10 14:17:59,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 14:18:03,187 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5966b4dc30>)
+[gpub007:0/64] 2023-07-10 14:18:03,187 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-10 14:18:03,194 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 14:23:03,903 (trainer:732) INFO: 32epoch:train:9101-9200batch: iter_time=1.226, forward_time=0.145, loss_ctc=73.144, loss_att=60.697, acc=0.696, loss=64.431, backward_time=1.038, grad_norm=108.879, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.359e-05, train_time=8.283
+[gpub007:0/64] 2023-07-10 14:25:21,227 (trainer:732) INFO: 32epoch:train:9201-9300batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=74.656, loss_att=56.639, acc=0.711, loss=62.044, backward_time=1.034, grad_norm=111.073, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.358e-05, train_time=2.746
+[gpub007:0/64] 2023-07-10 14:27:37,722 (trainer:732) INFO: 32epoch:train:9301-9400batch: iter_time=1.122e-04, forward_time=0.147, loss_ctc=71.766, loss_att=57.666, acc=0.712, loss=61.896, backward_time=1.030, grad_norm=109.025, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.357e-05, train_time=2.730
+[gpub007:0/64] 2023-07-10 14:29:53,547 (trainer:732) INFO: 32epoch:train:9401-9500batch: iter_time=1.220e-04, forward_time=0.146, loss_ctc=66.631, loss_att=45.648, acc=0.727, loss=51.943, backward_time=1.027, grad_norm=96.909, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.356e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 14:32:09,285 (trainer:732) INFO: 32epoch:train:9501-9600batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=68.474, loss_att=53.078, acc=0.720, loss=57.697, backward_time=1.028, grad_norm=104.949, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.355e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 14:34:25,158 (trainer:732) INFO: 32epoch:train:9601-9700batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=72.926, loss_att=58.313, acc=0.697, loss=62.697, backward_time=1.029, grad_norm=125.061, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.354e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 14:36:40,674 (trainer:732) INFO: 32epoch:train:9701-9800batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=65.064, loss_att=46.459, acc=0.723, loss=52.040, backward_time=1.026, grad_norm=110.940, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.353e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 14:38:56,247 (trainer:732) INFO: 32epoch:train:9801-9900batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=60.098, loss_att=45.816, acc=0.712, loss=50.101, backward_time=1.027, grad_norm=113.480, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.352e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 14:41:12,363 (trainer:732) INFO: 32epoch:train:9901-10000batch: iter_time=1.388e-04, forward_time=0.146, loss_ctc=68.428, loss_att=58.130, acc=0.710, loss=61.220, backward_time=1.030, grad_norm=117.880, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.181, optim0_lr0=6.350e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 14:54:12,519 (trainer:338) INFO: 32epoch results: [train] iter_time=0.169, forward_time=0.147, loss_ctc=69.828, loss_att=53.553, acc=0.706, loss=58.435, backward_time=1.031, grad_norm=110.367, clip=100.000, loss_scale=4.952e+27, optim_step_time=0.182, optim0_lr0=6.402e-05, train_time=3.259, time=4 hours, 31 minutes and 47.4 seconds, total_count=290000, gpu_max_cached_mem_GB=37.217, [valid] loss_ctc=47.779, cer_ctc=0.273, loss_att=40.205, acc=0.660, cer=0.433, wer=1.000, loss=42.477, time=6 minutes and 52.25 seconds, total_count=29854, gpu_max_cached_mem_GB=37.217, [att_plot] time=5 minutes and 55.08 seconds, total_count=0, gpu_max_cached_mem_GB=37.217
+[gpub007:0/64] 2023-07-10 14:54:30,700 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-10 14:54:30,707 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/27epoch.pth
+[gpub007:0/64] 2023-07-10 14:54:30,708 (trainer:272) INFO: 33/50epoch started. Estimated time to finish: 3 days, 14 hours and 54 minutes
+[gpub007:0/64] 2023-07-10 14:54:31,356 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-10 14:54:48,831 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 14:54:52,265 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5fdc503f70>)
+[gpub007:0/64] 2023-07-10 14:54:52,265 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-10 14:54:52,377 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 15:01:51,026 (trainer:732) INFO: 33epoch:train:1-100batch: iter_time=2.982, forward_time=0.171, loss_ctc=81.309, loss_att=58.949, acc=0.687, loss=65.657, backward_time=1.042, grad_norm=140.517, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.185, optim0_lr0=6.349e-05, train_time=8.795
+[gpub007:0/64] 2023-07-10 15:04:07,486 (trainer:732) INFO: 33epoch:train:101-200batch: iter_time=1.085e-04, forward_time=0.146, loss_ctc=80.757, loss_att=58.329, acc=0.689, loss=65.057, backward_time=1.030, grad_norm=121.535, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.348e-05, train_time=2.729
+[gpub007:0/64] 2023-07-10 15:06:23,888 (trainer:732) INFO: 33epoch:train:201-300batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=70.150, loss_att=53.451, acc=0.706, loss=58.460, backward_time=1.029, grad_norm=94.366, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.347e-05, train_time=2.728
+[gpub007:0/64] 2023-07-10 15:08:45,416 (trainer:732) INFO: 33epoch:train:301-400batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=74.947, loss_att=53.595, acc=0.726, loss=60.001, backward_time=1.035, grad_norm=145.374, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.346e-05, train_time=2.830
+[gpub007:0/64] 2023-07-10 15:11:00,955 (trainer:732) INFO: 33epoch:train:401-500batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=72.597, loss_att=50.951, acc=0.715, loss=57.445, backward_time=1.027, grad_norm=162.304, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.345e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 15:13:16,840 (trainer:732) INFO: 33epoch:train:501-600batch: iter_time=1.336e-04, forward_time=0.146, loss_ctc=71.856, loss_att=56.265, acc=0.708, loss=60.942, backward_time=1.028, grad_norm=113.889, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.344e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 15:15:32,498 (trainer:732) INFO: 33epoch:train:601-700batch: iter_time=1.299e-04, forward_time=0.146, loss_ctc=71.532, loss_att=49.708, acc=0.713, loss=56.256, backward_time=1.027, grad_norm=120.484, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.343e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 15:17:49,292 (trainer:732) INFO: 33epoch:train:701-800batch: iter_time=1.353e-04, forward_time=0.146, loss_ctc=58.765, loss_att=44.584, acc=0.702, loss=48.839, backward_time=1.030, grad_norm=96.075, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.342e-05, train_time=2.736
+[gpub007:0/64] 2023-07-10 15:18:41,780 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-10 15:18:59,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 15:19:02,787 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aed32bee0>)
+[gpub007:0/64] 2023-07-10 15:19:02,787 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-10 15:19:02,793 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 15:24:32,340 (trainer:732) INFO: 33epoch:train:801-900batch: iter_time=1.379, forward_time=0.167, loss_ctc=77.054, loss_att=59.093, acc=0.692, loss=64.482, backward_time=1.039, grad_norm=110.763, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.341e-05, train_time=8.061
+[gpub007:0/64] 2023-07-10 15:26:52,029 (trainer:732) INFO: 33epoch:train:901-1000batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=83.596, loss_att=56.990, acc=0.690, loss=64.972, backward_time=1.033, grad_norm=141.577, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.340e-05, train_time=2.794
+[gpub007:0/64] 2023-07-10 15:29:08,446 (trainer:732) INFO: 33epoch:train:1001-1100batch: iter_time=1.392e-04, forward_time=0.148, loss_ctc=68.429, loss_att=55.307, acc=0.702, loss=59.244, backward_time=1.030, grad_norm=126.627, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.339e-05, train_time=2.728
+[gpub007:0/64] 2023-07-10 15:31:24,327 (trainer:732) INFO: 33epoch:train:1101-1200batch: iter_time=1.288e-04, forward_time=0.147, loss_ctc=73.355, loss_att=52.152, acc=0.723, loss=58.512, backward_time=1.028, grad_norm=115.131, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.338e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 15:33:39,989 (trainer:732) INFO: 33epoch:train:1201-1300batch: iter_time=1.435e-04, forward_time=0.147, loss_ctc=64.075, loss_att=43.059, acc=0.728, loss=49.364, backward_time=1.028, grad_norm=95.466, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.337e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 15:35:55,804 (trainer:732) INFO: 33epoch:train:1301-1400batch: iter_time=1.343e-04, forward_time=0.147, loss_ctc=73.302, loss_att=56.271, acc=0.716, loss=61.380, backward_time=1.028, grad_norm=119.694, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.336e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 15:38:11,410 (trainer:732) INFO: 33epoch:train:1401-1500batch: iter_time=1.321e-04, forward_time=0.147, loss_ctc=72.741, loss_att=52.143, acc=0.714, loss=58.322, backward_time=1.027, grad_norm=107.157, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.335e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 15:40:26,947 (trainer:732) INFO: 33epoch:train:1501-1600batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=60.881, loss_att=46.840, acc=0.709, loss=51.052, backward_time=1.027, grad_norm=111.445, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.334e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 15:42:00,968 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-10 15:42:19,254 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 15:42:22,697 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aed32b6a0>)
+[gpub007:0/64] 2023-07-10 15:42:22,698 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-10 15:42:22,704 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 15:45:39,799 (trainer:732) INFO: 33epoch:train:1601-1700batch: iter_time=1.636, forward_time=0.149, loss_ctc=70.667, loss_att=54.373, acc=0.686, loss=59.262, backward_time=1.039, grad_norm=100.782, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.183, optim0_lr0=6.333e-05, train_time=6.257
+[gpub007:0/64] 2023-07-10 15:47:56,440 (trainer:732) INFO: 33epoch:train:1701-1800batch: iter_time=1.377e-04, forward_time=0.146, loss_ctc=82.484, loss_att=57.215, acc=0.680, loss=64.796, backward_time=1.030, grad_norm=122.345, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.332e-05, train_time=2.733
+[gpub007:0/64] 2023-07-10 15:50:12,354 (trainer:732) INFO: 33epoch:train:1801-1900batch: iter_time=1.406e-04, forward_time=0.146, loss_ctc=68.883, loss_att=56.964, acc=0.689, loss=60.540, backward_time=1.028, grad_norm=114.633, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.331e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 15:52:28,082 (trainer:732) INFO: 33epoch:train:1901-2000batch: iter_time=1.456e-04, forward_time=0.147, loss_ctc=72.270, loss_att=54.885, acc=0.713, loss=60.101, backward_time=1.028, grad_norm=111.059, clip=100.000, loss_scale=9.904e+27, optim_step_time=0.182, optim0_lr0=6.330e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 15:54:43,802 (trainer:732) INFO: 33epoch:train:2001-2100batch: iter_time=1.321e-04, forward_time=0.146, loss_ctc=65.468, loss_att=43.872, acc=0.718, loss=50.351, backward_time=1.027, grad_norm=91.409, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.329e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 15:56:59,983 (trainer:732) INFO: 33epoch:train:2101-2200batch: iter_time=1.069e-04, forward_time=0.147, loss_ctc=70.783, loss_att=54.494, acc=0.716, loss=59.380, backward_time=1.029, grad_norm=105.521, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.328e-05, train_time=2.723
+[gpub007:0/64] 2023-07-10 15:59:15,922 (trainer:732) INFO: 33epoch:train:2201-2300batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=73.796, loss_att=53.110, acc=0.708, loss=59.316, backward_time=1.028, grad_norm=106.896, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.327e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 16:01:31,242 (trainer:732) INFO: 33epoch:train:2301-2400batch: iter_time=1.127e-04, forward_time=0.144, loss_ctc=62.133, loss_att=47.015, acc=0.702, loss=51.550, backward_time=1.024, grad_norm=94.158, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.326e-05, train_time=2.706
+[gpub007:0/64] 2023-07-10 16:03:46,902 (trainer:732) INFO: 33epoch:train:2401-2500batch: iter_time=1.129e-04, forward_time=0.145, loss_ctc=66.650, loss_att=49.489, acc=0.698, loss=54.637, backward_time=1.027, grad_norm=126.734, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.325e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 16:03:48,239 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-10 16:04:06,299 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 16:04:09,786 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65deb87e20>)
+[gpub007:0/64] 2023-07-10 16:04:09,786 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-10 16:04:09,792 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 16:09:43,696 (trainer:732) INFO: 33epoch:train:2501-2600batch: iter_time=1.210, forward_time=0.163, loss_ctc=79.428, loss_att=58.300, acc=0.691, loss=64.638, backward_time=1.042, grad_norm=124.156, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.184, optim0_lr0=6.324e-05, train_time=7.136
+[gpub007:0/64] 2023-07-10 16:11:59,861 (trainer:732) INFO: 33epoch:train:2601-2700batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=75.844, loss_att=56.845, acc=0.689, loss=62.544, backward_time=1.029, grad_norm=132.914, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.323e-05, train_time=2.723
+[gpub007:0/64] 2023-07-10 16:14:15,858 (trainer:732) INFO: 33epoch:train:2701-2800batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=69.542, loss_att=52.712, acc=0.711, loss=57.761, backward_time=1.030, grad_norm=114.777, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.322e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 16:16:31,778 (trainer:732) INFO: 33epoch:train:2801-2900batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=73.852, loss_att=52.859, acc=0.729, loss=59.157, backward_time=1.029, grad_norm=115.952, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.321e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 16:18:47,472 (trainer:732) INFO: 33epoch:train:2901-3000batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=68.655, loss_att=48.703, acc=0.723, loss=54.688, backward_time=1.027, grad_norm=102.081, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.320e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 16:21:04,634 (trainer:732) INFO: 33epoch:train:3001-3100batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=71.203, loss_att=55.407, acc=0.714, loss=60.146, backward_time=1.028, grad_norm=99.705, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.319e-05, train_time=2.743
+[gpub007:0/64] 2023-07-10 16:23:45,327 (trainer:732) INFO: 33epoch:train:3101-3200batch: iter_time=7.377e-04, forward_time=0.195, loss_ctc=69.408, loss_att=48.348, acc=0.724, loss=54.666, backward_time=1.060, grad_norm=97.142, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.188, optim0_lr0=6.318e-05, train_time=3.214
+[gpub007:0/64] 2023-07-10 16:26:03,246 (trainer:732) INFO: 33epoch:train:3201-3300batch: iter_time=0.005, forward_time=0.152, loss_ctc=58.701, loss_att=45.362, acc=0.703, loss=49.364, backward_time=1.033, grad_norm=90.793, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.317e-05, train_time=2.758
+[gpub007:0/64] 2023-07-10 16:27:04,564 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-10 16:27:22,892 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 16:27:26,375 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5e89943dc0>)
+[gpub007:0/64] 2023-07-10 16:27:26,375 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-10 16:27:26,381 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 16:34:16,021 (trainer:732) INFO: 33epoch:train:3301-3400batch: iter_time=1.970, forward_time=0.145, loss_ctc=76.741, loss_att=58.406, acc=0.688, loss=63.907, backward_time=1.041, grad_norm=116.933, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.316e-05, train_time=9.855
+[gpub007:0/64] 2023-07-10 16:36:32,136 (trainer:732) INFO: 33epoch:train:3401-3500batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=77.940, loss_att=55.696, acc=0.688, loss=62.369, backward_time=1.028, grad_norm=121.412, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.315e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 16:38:47,675 (trainer:732) INFO: 33epoch:train:3501-3600batch: iter_time=1.211e-04, forward_time=0.145, loss_ctc=68.014, loss_att=54.887, acc=0.697, loss=58.825, backward_time=1.026, grad_norm=120.682, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.314e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 16:41:03,377 (trainer:732) INFO: 33epoch:train:3601-3700batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=72.287, loss_att=54.805, acc=0.716, loss=60.049, backward_time=1.028, grad_norm=114.066, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.313e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 16:43:18,735 (trainer:732) INFO: 33epoch:train:3701-3800batch: iter_time=1.286e-04, forward_time=0.145, loss_ctc=64.499, loss_att=43.830, acc=0.721, loss=50.031, backward_time=1.026, grad_norm=113.227, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.312e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 16:45:34,412 (trainer:732) INFO: 33epoch:train:3801-3900batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=73.673, loss_att=55.965, acc=0.715, loss=61.277, backward_time=1.028, grad_norm=117.572, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.311e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 16:47:49,943 (trainer:732) INFO: 33epoch:train:3901-4000batch: iter_time=1.230e-04, forward_time=0.146, loss_ctc=71.386, loss_att=51.773, acc=0.712, loss=57.657, backward_time=1.026, grad_norm=118.908, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.310e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 16:50:05,403 (trainer:732) INFO: 33epoch:train:4001-4100batch: iter_time=1.465e-04, forward_time=0.145, loss_ctc=61.354, loss_att=45.518, acc=0.709, loss=50.269, backward_time=1.025, grad_norm=105.199, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.309e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 16:51:36,538 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-10 16:51:54,729 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 16:51:58,191 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65de9a37c0>)
+[gpub007:0/64] 2023-07-10 16:51:58,191 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-10 16:51:58,198 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 16:56:17,698 (trainer:732) INFO: 33epoch:train:4101-4200batch: iter_time=1.214, forward_time=0.149, loss_ctc=70.786, loss_att=56.826, acc=0.687, loss=61.014, backward_time=1.045, grad_norm=103.303, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.308e-05, train_time=7.445
+[gpub007:0/64] 2023-07-10 16:58:35,498 (trainer:732) INFO: 33epoch:train:4201-4300batch: iter_time=1.257e-04, forward_time=0.148, loss_ctc=81.391, loss_att=56.493, acc=0.690, loss=63.963, backward_time=1.034, grad_norm=138.704, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.307e-05, train_time=2.756
+[gpub007:0/64] 2023-07-10 17:00:51,315 (trainer:732) INFO: 33epoch:train:4301-4400batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=69.151, loss_att=54.165, acc=0.692, loss=58.661, backward_time=1.028, grad_norm=108.222, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.306e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 17:03:06,996 (trainer:732) INFO: 33epoch:train:4401-4500batch: iter_time=1.135e-04, forward_time=0.146, loss_ctc=67.175, loss_att=50.140, acc=0.714, loss=55.251, backward_time=1.028, grad_norm=115.769, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.305e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 17:05:22,364 (trainer:732) INFO: 33epoch:train:4501-4600batch: iter_time=1.324e-04, forward_time=0.145, loss_ctc=69.180, loss_att=49.560, acc=0.726, loss=55.446, backward_time=1.026, grad_norm=123.860, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.304e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 17:07:38,144 (trainer:732) INFO: 33epoch:train:4601-4700batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=75.561, loss_att=57.210, acc=0.707, loss=62.715, backward_time=1.029, grad_norm=115.998, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.303e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 17:09:53,850 (trainer:732) INFO: 33epoch:train:4701-4800batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=64.681, loss_att=46.224, acc=0.717, loss=51.761, backward_time=1.029, grad_norm=104.308, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.302e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 17:12:09,364 (trainer:732) INFO: 33epoch:train:4801-4900batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=69.414, loss_att=50.143, acc=0.713, loss=55.924, backward_time=1.027, grad_norm=100.383, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.301e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 17:14:28,978 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-10 17:14:47,350 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 17:14:50,804 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5e899745b0>)
+[gpub007:0/64] 2023-07-10 17:14:50,805 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-10 17:14:50,814 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 17:19:38,238 (trainer:732) INFO: 33epoch:train:4901-5000batch: iter_time=1.633, forward_time=0.175, loss_ctc=62.916, loss_att=49.597, acc=0.690, loss=53.593, backward_time=1.033, grad_norm=107.175, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.183, optim0_lr0=6.300e-05, train_time=8.977
+[gpub007:0/64] 2023-07-10 17:22:00,097 (trainer:732) INFO: 33epoch:train:5001-5100batch: iter_time=1.087e-04, forward_time=0.146, loss_ctc=78.968, loss_att=56.740, acc=0.695, loss=63.408, backward_time=1.052, grad_norm=114.389, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.299e-05, train_time=2.837
+[gpub007:0/64] 2023-07-10 17:24:22,818 (trainer:732) INFO: 33epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.145, loss_ctc=73.361, loss_att=56.222, acc=0.682, loss=61.364, backward_time=1.037, grad_norm=142.420, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.298e-05, train_time=2.854
+[gpub007:0/64] 2023-07-10 17:26:40,403 (trainer:732) INFO: 33epoch:train:5201-5300batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=70.413, loss_att=53.668, acc=0.705, loss=58.692, backward_time=1.030, grad_norm=106.880, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.297e-05, train_time=2.751
+[gpub007:0/64] 2023-07-10 17:28:56,798 (trainer:732) INFO: 33epoch:train:5301-5400batch: iter_time=1.264e-04, forward_time=0.146, loss_ctc=72.574, loss_att=53.666, acc=0.725, loss=59.338, backward_time=1.027, grad_norm=112.195, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.296e-05, train_time=2.728
+[gpub007:0/64] 2023-07-10 17:31:16,771 (trainer:732) INFO: 33epoch:train:5401-5500batch: iter_time=1.354e-04, forward_time=0.146, loss_ctc=68.498, loss_att=47.891, acc=0.720, loss=54.073, backward_time=1.030, grad_norm=108.496, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.295e-05, train_time=2.799
+[gpub007:0/64] 2023-07-10 17:33:33,130 (trainer:732) INFO: 33epoch:train:5501-5600batch: iter_time=1.450e-04, forward_time=0.147, loss_ctc=71.045, loss_att=55.443, acc=0.713, loss=60.123, backward_time=1.029, grad_norm=115.324, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.294e-05, train_time=2.727
+[gpub007:0/64] 2023-07-10 17:35:48,684 (trainer:732) INFO: 33epoch:train:5601-5700batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=68.551, loss_att=47.933, acc=0.717, loss=54.118, backward_time=1.027, grad_norm=110.816, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.293e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 17:38:04,039 (trainer:732) INFO: 33epoch:train:5701-5800batch: iter_time=1.287e-04, forward_time=0.144, loss_ctc=59.922, loss_att=43.721, acc=0.700, loss=48.581, backward_time=1.026, grad_norm=93.916, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.292e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 17:38:50,122 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-10 17:39:08,178 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 17:39:11,617 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f594e703df0>)
+[gpub007:0/64] 2023-07-10 17:39:11,617 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-10 17:39:11,623 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 17:44:00,936 (trainer:732) INFO: 33epoch:train:5801-5900batch: iter_time=1.385, forward_time=0.145, loss_ctc=74.081, loss_att=55.293, acc=0.693, loss=60.929, backward_time=1.044, grad_norm=102.943, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.291e-05, train_time=7.138
+[gpub007:0/64] 2023-07-10 17:46:17,528 (trainer:732) INFO: 33epoch:train:5901-6000batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=73.307, loss_att=55.392, acc=0.695, loss=60.767, backward_time=1.029, grad_norm=132.164, clip=100.000, loss_scale=1.981e+28, optim_step_time=0.182, optim0_lr0=6.290e-05, train_time=2.732
+[gpub007:0/64] 2023-07-10 17:48:33,613 (trainer:732) INFO: 33epoch:train:6001-6100batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=69.019, loss_att=52.685, acc=0.712, loss=57.585, backward_time=1.028, grad_norm=117.027, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.289e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 17:50:49,583 (trainer:732) INFO: 33epoch:train:6101-6200batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=72.669, loss_att=50.812, acc=0.733, loss=57.369, backward_time=1.029, grad_norm=111.041, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.288e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 17:53:05,443 (trainer:732) INFO: 33epoch:train:6201-6300batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=66.953, loss_att=47.202, acc=0.727, loss=53.127, backward_time=1.028, grad_norm=100.819, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.287e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 17:55:21,553 (trainer:732) INFO: 33epoch:train:6301-6400batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=70.438, loss_att=55.279, acc=0.711, loss=59.827, backward_time=1.032, grad_norm=106.207, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.286e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 17:57:37,314 (trainer:732) INFO: 33epoch:train:6401-6500batch: iter_time=1.237e-04, forward_time=0.148, loss_ctc=68.695, loss_att=48.236, acc=0.721, loss=54.373, backward_time=1.029, grad_norm=102.571, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.183, optim0_lr0=6.285e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 17:59:52,886 (trainer:732) INFO: 33epoch:train:6501-6600batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=57.891, loss_att=45.251, acc=0.700, loss=49.043, backward_time=1.027, grad_norm=111.736, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.183, optim0_lr0=6.284e-05, train_time=2.711
+[gpub007:0/64] 2023-07-10 18:01:23,929 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-10 18:01:42,172 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 18:01:45,590 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f686e29b4f0>)
+[gpub007:0/64] 2023-07-10 18:01:45,590 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-10 18:01:45,597 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 18:06:35,698 (trainer:732) INFO: 33epoch:train:6601-6700batch: iter_time=1.223, forward_time=0.147, loss_ctc=71.258, loss_att=50.197, acc=0.710, loss=56.515, backward_time=1.044, grad_norm=111.125, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.283e-05, train_time=8.056
+[gpub007:0/64] 2023-07-10 18:08:52,452 (trainer:732) INFO: 33epoch:train:6701-6800batch: iter_time=1.332e-04, forward_time=0.148, loss_ctc=78.114, loss_att=54.449, acc=0.690, loss=61.548, backward_time=1.032, grad_norm=151.267, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.282e-05, train_time=2.735
+[gpub007:0/64] 2023-07-10 18:11:09,090 (trainer:732) INFO: 33epoch:train:6801-6900batch: iter_time=1.245e-04, forward_time=0.148, loss_ctc=67.217, loss_att=54.946, acc=0.697, loss=58.627, backward_time=1.031, grad_norm=124.407, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.281e-05, train_time=2.733
+[gpub007:0/64] 2023-07-10 18:13:25,167 (trainer:732) INFO: 33epoch:train:6901-7000batch: iter_time=1.224e-04, forward_time=0.147, loss_ctc=71.830, loss_att=53.603, acc=0.720, loss=59.071, backward_time=1.030, grad_norm=110.919, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.280e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 18:15:41,008 (trainer:732) INFO: 33epoch:train:7001-7100batch: iter_time=1.280e-04, forward_time=0.147, loss_ctc=63.613, loss_att=42.886, acc=0.726, loss=49.104, backward_time=1.028, grad_norm=90.472, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.279e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 18:17:57,104 (trainer:732) INFO: 33epoch:train:7101-7200batch: iter_time=1.218e-04, forward_time=0.148, loss_ctc=71.112, loss_att=53.614, acc=0.720, loss=58.863, backward_time=1.030, grad_norm=111.220, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.278e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 18:20:12,800 (trainer:732) INFO: 33epoch:train:7201-7300batch: iter_time=1.190e-04, forward_time=0.147, loss_ctc=73.507, loss_att=52.795, acc=0.710, loss=59.009, backward_time=1.027, grad_norm=108.732, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.277e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 18:22:28,504 (trainer:732) INFO: 33epoch:train:7301-7400batch: iter_time=1.163e-04, forward_time=0.148, loss_ctc=61.004, loss_att=45.528, acc=0.709, loss=50.171, backward_time=1.028, grad_norm=111.613, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.276e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 18:24:44,144 (trainer:732) INFO: 33epoch:train:7401-7500batch: iter_time=1.112e-04, forward_time=0.147, loss_ctc=66.357, loss_att=49.549, acc=0.698, loss=54.591, backward_time=1.027, grad_norm=99.908, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.275e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 18:24:45,506 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-10 18:25:03,650 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 18:25:07,132 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5fbd02b4f0>)
+[gpub007:0/64] 2023-07-10 18:25:07,132 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-10 18:25:07,138 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 18:30:49,786 (trainer:732) INFO: 33epoch:train:7501-7600batch: iter_time=1.256, forward_time=0.167, loss_ctc=79.656, loss_att=56.412, acc=0.699, loss=63.385, backward_time=1.051, grad_norm=132.609, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.183, optim0_lr0=6.274e-05, train_time=7.312
+[gpub007:0/64] 2023-07-10 18:33:07,622 (trainer:732) INFO: 33epoch:train:7601-7700batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=67.726, loss_att=53.320, acc=0.699, loss=57.642, backward_time=1.029, grad_norm=129.357, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.273e-05, train_time=2.757
+[gpub007:0/64] 2023-07-10 18:35:23,690 (trainer:732) INFO: 33epoch:train:7701-7800batch: iter_time=1.159e-04, forward_time=0.147, loss_ctc=65.772, loss_att=48.486, acc=0.726, loss=53.672, backward_time=1.029, grad_norm=136.827, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.272e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 18:37:39,934 (trainer:732) INFO: 33epoch:train:7801-7900batch: iter_time=1.207e-04, forward_time=0.148, loss_ctc=68.408, loss_att=47.382, acc=0.741, loss=53.689, backward_time=1.030, grad_norm=123.867, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.271e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 18:39:56,176 (trainer:732) INFO: 33epoch:train:7901-8000batch: iter_time=1.286e-04, forward_time=0.148, loss_ctc=73.520, loss_att=55.555, acc=0.724, loss=60.945, backward_time=1.031, grad_norm=106.924, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.270e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 18:42:11,859 (trainer:732) INFO: 33epoch:train:8001-8100batch: iter_time=1.205e-04, forward_time=0.146, loss_ctc=65.020, loss_att=48.260, acc=0.716, loss=53.288, backward_time=1.028, grad_norm=104.832, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.269e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 18:44:27,902 (trainer:732) INFO: 33epoch:train:8101-8200batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=69.898, loss_att=50.544, acc=0.716, loss=56.350, backward_time=1.028, grad_norm=107.278, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.268e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 18:46:43,665 (trainer:732) INFO: 33epoch:train:8201-8300batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=60.715, loss_att=46.249, acc=0.711, loss=50.589, backward_time=1.028, grad_norm=91.522, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.267e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 18:47:44,425 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-10 18:48:02,552 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 18:48:05,980 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5fbf87b520>)
+[gpub007:0/64] 2023-07-10 18:48:05,980 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-10 18:48:05,987 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 18:52:39,876 (trainer:732) INFO: 33epoch:train:8301-8400batch: iter_time=1.729, forward_time=0.184, loss_ctc=72.966, loss_att=53.592, acc=0.703, loss=59.404, backward_time=1.042, grad_norm=105.774, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.186, optim0_lr0=6.266e-05, train_time=7.123
+[gpub007:0/64] 2023-07-10 18:54:56,849 (trainer:732) INFO: 33epoch:train:8401-8500batch: iter_time=1.531e-04, forward_time=0.147, loss_ctc=75.500, loss_att=54.748, acc=0.691, loss=60.974, backward_time=1.030, grad_norm=123.647, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.265e-05, train_time=2.740
+[gpub007:0/64] 2023-07-10 18:57:13,532 (trainer:732) INFO: 33epoch:train:8501-8600batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=66.478, loss_att=54.282, acc=0.700, loss=57.941, backward_time=1.030, grad_norm=118.749, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.264e-05, train_time=2.733
+[gpub007:0/64] 2023-07-10 18:59:29,475 (trainer:732) INFO: 33epoch:train:8601-8700batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=71.725, loss_att=53.649, acc=0.719, loss=59.072, backward_time=1.028, grad_norm=197.736, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.263e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 19:01:45,336 (trainer:732) INFO: 33epoch:train:8701-8800batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=63.025, loss_att=43.130, acc=0.727, loss=49.098, backward_time=1.028, grad_norm=103.614, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.262e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 19:04:01,102 (trainer:732) INFO: 33epoch:train:8801-8900batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=71.275, loss_att=54.296, acc=0.716, loss=59.390, backward_time=1.027, grad_norm=114.658, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.261e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 19:06:21,567 (trainer:732) INFO: 33epoch:train:8901-9000batch: iter_time=1.338e-04, forward_time=0.145, loss_ctc=70.594, loss_att=51.243, acc=0.714, loss=57.048, backward_time=1.039, grad_norm=110.215, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.260e-05, train_time=2.809
+[gpub007:0/64] 2023-07-10 19:08:37,159 (trainer:732) INFO: 33epoch:train:9001-9100batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=60.715, loss_att=45.124, acc=0.711, loss=49.801, backward_time=1.027, grad_norm=89.618, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.259e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 19:10:14,028 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-10 19:10:32,057 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 19:10:35,839 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65f26fb910>)
+[gpub007:0/64] 2023-07-10 19:10:35,839 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-10 19:10:35,845 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 19:13:48,794 (trainer:732) INFO: 33epoch:train:9101-9200batch: iter_time=1.681, forward_time=0.162, loss_ctc=70.682, loss_att=57.077, acc=0.693, loss=61.159, backward_time=1.040, grad_norm=102.706, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.258e-05, train_time=6.232
+[gpub007:0/64] 2023-07-10 19:16:04,921 (trainer:732) INFO: 33epoch:train:9201-9300batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=77.583, loss_att=53.562, acc=0.704, loss=60.768, backward_time=1.029, grad_norm=123.747, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.181, optim0_lr0=6.257e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 19:18:21,240 (trainer:732) INFO: 33epoch:train:9301-9400batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=67.520, loss_att=53.688, acc=0.701, loss=57.838, backward_time=1.028, grad_norm=107.774, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.181, optim0_lr0=6.256e-05, train_time=2.726
+[gpub007:0/64] 2023-07-10 19:20:37,860 (trainer:732) INFO: 33epoch:train:9401-9500batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=66.978, loss_att=49.402, acc=0.725, loss=54.675, backward_time=1.030, grad_norm=109.628, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.255e-05, train_time=2.732
+[gpub007:0/64] 2023-07-10 19:22:53,987 (trainer:732) INFO: 33epoch:train:9501-9600batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=68.305, loss_att=47.647, acc=0.738, loss=53.844, backward_time=1.027, grad_norm=109.822, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.254e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 19:25:10,253 (trainer:732) INFO: 33epoch:train:9601-9700batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=73.905, loss_att=55.968, acc=0.726, loss=61.349, backward_time=1.030, grad_norm=108.971, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.253e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 19:27:25,776 (trainer:732) INFO: 33epoch:train:9701-9800batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=63.140, loss_att=45.901, acc=0.717, loss=51.072, backward_time=1.026, grad_norm=104.687, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.252e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 19:29:41,827 (trainer:732) INFO: 33epoch:train:9801-9900batch: iter_time=1.271e-04, forward_time=0.147, loss_ctc=69.084, loss_att=49.034, acc=0.722, loss=55.049, backward_time=1.031, grad_norm=117.631, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.251e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 19:31:57,752 (trainer:732) INFO: 33epoch:train:9901-10000batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=62.702, loss_att=47.431, acc=0.711, loss=52.012, backward_time=1.030, grad_norm=129.784, clip=100.000, loss_scale=3.961e+28, optim_step_time=0.182, optim0_lr0=6.250e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 19:47:05,111 (trainer:338) INFO: 33epoch results: [train] iter_time=0.193, forward_time=0.148, loss_ctc=70.146, loss_att=51.879, acc=0.709, loss=57.359, backward_time=1.031, grad_norm=114.398, clip=100.000, loss_scale=2.575e+28, optim_step_time=0.182, optim0_lr0=6.300e-05, train_time=3.329, time=4 hours, 37 minutes and 49.73 seconds, total_count=300000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=48.935, cer_ctc=0.271, loss_att=47.287, acc=0.649, cer=0.446, wer=1.000, loss=47.781, time=8 minutes and 37.42 seconds, total_count=30866, gpu_max_cached_mem_GB=37.219, [att_plot] time=6 minutes and 7.12 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-10 19:47:20,758 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-10 19:47:20,766 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/28epoch.pth
+[gpub007:0/64] 2023-07-10 19:47:20,766 (trainer:272) INFO: 34/50epoch started. Estimated time to finish: 3 days, 10 hours and 22 minutes
+[gpub007:0/64] 2023-07-10 19:47:20,770 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-10 19:47:39,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 19:47:43,243 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5abd7d5c30>)
+[gpub007:0/64] 2023-07-10 19:47:43,243 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-10 19:47:43,274 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 19:59:42,068 (trainer:732) INFO: 34epoch:train:1-100batch: iter_time=5.956, forward_time=0.201, loss_ctc=69.349, loss_att=48.012, acc=0.699, loss=54.413, backward_time=1.042, grad_norm=102.808, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.185, optim0_lr0=6.249e-05, train_time=14.826
+[gpub007:0/64] 2023-07-10 20:02:00,522 (trainer:732) INFO: 34epoch:train:101-200batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=71.446, loss_att=53.919, acc=0.703, loss=59.177, backward_time=1.028, grad_norm=114.778, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.249e-05, train_time=2.769
+[gpub007:0/64] 2023-07-10 20:04:17,015 (trainer:732) INFO: 34epoch:train:201-300batch: iter_time=1.355e-04, forward_time=0.145, loss_ctc=61.524, loss_att=51.532, acc=0.691, loss=54.529, backward_time=1.025, grad_norm=102.625, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.248e-05, train_time=2.730
+[gpub007:0/64] 2023-07-10 20:06:33,815 (trainer:732) INFO: 34epoch:train:301-400batch: iter_time=1.484e-04, forward_time=0.145, loss_ctc=66.318, loss_att=50.030, acc=0.695, loss=54.917, backward_time=1.025, grad_norm=111.002, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.247e-05, train_time=2.736
+[gpub007:0/64] 2023-07-10 20:08:49,770 (trainer:732) INFO: 34epoch:train:401-500batch: iter_time=1.378e-04, forward_time=0.146, loss_ctc=81.394, loss_att=64.657, acc=0.684, loss=69.678, backward_time=1.028, grad_norm=119.604, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.246e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 20:11:08,435 (trainer:732) INFO: 34epoch:train:501-600batch: iter_time=1.356e-04, forward_time=0.145, loss_ctc=80.323, loss_att=56.481, acc=0.697, loss=63.633, backward_time=1.028, grad_norm=109.089, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.245e-05, train_time=2.773
+[gpub007:0/64] 2023-07-10 20:13:24,190 (trainer:732) INFO: 34epoch:train:601-700batch: iter_time=1.325e-04, forward_time=0.146, loss_ctc=68.599, loss_att=53.053, acc=0.711, loss=57.717, backward_time=1.029, grad_norm=143.225, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.244e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 20:15:40,219 (trainer:732) INFO: 34epoch:train:701-800batch: iter_time=1.249e-04, forward_time=0.145, loss_ctc=79.178, loss_att=58.364, acc=0.700, loss=64.608, backward_time=1.027, grad_norm=111.141, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.243e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 20:16:34,521 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-10 20:16:52,203 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 20:16:55,576 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5abd7d71c0>)
+[gpub007:0/64] 2023-07-10 20:16:55,576 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-10 20:16:55,582 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 20:21:03,836 (trainer:732) INFO: 34epoch:train:801-900batch: iter_time=1.287, forward_time=0.147, loss_ctc=70.884, loss_att=52.866, acc=0.696, loss=58.272, backward_time=1.046, grad_norm=119.164, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.242e-05, train_time=6.472
+[gpub007:0/64] 2023-07-10 20:23:20,205 (trainer:732) INFO: 34epoch:train:901-1000batch: iter_time=1.497e-04, forward_time=0.147, loss_ctc=71.782, loss_att=53.483, acc=0.707, loss=58.973, backward_time=1.030, grad_norm=105.750, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.241e-05, train_time=2.727
+[gpub007:0/64] 2023-07-10 20:25:35,958 (trainer:732) INFO: 34epoch:train:1001-1100batch: iter_time=1.359e-04, forward_time=0.146, loss_ctc=61.281, loss_att=49.246, acc=0.710, loss=52.857, backward_time=1.027, grad_norm=103.242, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.240e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 20:27:52,052 (trainer:732) INFO: 34epoch:train:1101-1200batch: iter_time=1.330e-04, forward_time=0.145, loss_ctc=65.369, loss_att=47.573, acc=0.708, loss=52.912, backward_time=1.027, grad_norm=105.568, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.239e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 20:30:08,295 (trainer:732) INFO: 34epoch:train:1201-1300batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=80.498, loss_att=63.017, acc=0.694, loss=68.261, backward_time=1.031, grad_norm=133.697, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.238e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 20:32:24,060 (trainer:732) INFO: 34epoch:train:1301-1400batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=77.530, loss_att=56.703, acc=0.706, loss=62.951, backward_time=1.029, grad_norm=124.538, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.237e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 20:34:39,939 (trainer:732) INFO: 34epoch:train:1401-1500batch: iter_time=1.059e-04, forward_time=0.146, loss_ctc=68.272, loss_att=51.475, acc=0.721, loss=56.514, backward_time=1.029, grad_norm=114.620, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.236e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 20:36:55,655 (trainer:732) INFO: 34epoch:train:1501-1600batch: iter_time=1.094e-04, forward_time=0.144, loss_ctc=78.170, loss_att=58.606, acc=0.708, loss=64.475, backward_time=1.028, grad_norm=125.969, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.235e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 20:38:27,808 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-10 20:38:45,905 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 20:38:49,379 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a4d01c2b0>)
+[gpub007:0/64] 2023-07-10 20:38:49,379 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-10 20:38:49,385 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 20:41:45,529 (trainer:732) INFO: 34epoch:train:1601-1700batch: iter_time=1.215, forward_time=0.145, loss_ctc=65.763, loss_att=49.462, acc=0.716, loss=54.353, backward_time=1.041, grad_norm=114.498, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.234e-05, train_time=5.797
+[gpub007:0/64] 2023-07-10 20:44:01,518 (trainer:732) INFO: 34epoch:train:1701-1800batch: iter_time=1.185e-04, forward_time=0.146, loss_ctc=67.985, loss_att=51.996, acc=0.693, loss=56.793, backward_time=1.029, grad_norm=123.291, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.233e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 20:46:17,392 (trainer:732) INFO: 34epoch:train:1801-1900batch: iter_time=1.154e-04, forward_time=0.146, loss_ctc=70.309, loss_att=53.677, acc=0.702, loss=58.666, backward_time=1.029, grad_norm=107.144, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.232e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 20:48:32,821 (trainer:732) INFO: 34epoch:train:1901-2000batch: iter_time=1.295e-04, forward_time=0.146, loss_ctc=60.184, loss_att=47.079, acc=0.705, loss=51.011, backward_time=1.025, grad_norm=109.060, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.231e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 20:50:48,321 (trainer:732) INFO: 34epoch:train:2001-2100batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=72.203, loss_att=50.001, acc=0.703, loss=56.661, backward_time=1.027, grad_norm=105.082, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.230e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 20:53:04,041 (trainer:732) INFO: 34epoch:train:2101-2200batch: iter_time=1.280e-04, forward_time=0.145, loss_ctc=72.034, loss_att=57.310, acc=0.694, loss=61.727, backward_time=1.028, grad_norm=129.261, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.229e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 20:55:19,665 (trainer:732) INFO: 34epoch:train:2201-2300batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=79.750, loss_att=58.507, acc=0.697, loss=64.880, backward_time=1.027, grad_norm=112.420, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.183, optim0_lr0=6.228e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 20:57:35,180 (trainer:732) INFO: 34epoch:train:2301-2400batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=66.119, loss_att=49.079, acc=0.718, loss=54.191, backward_time=1.028, grad_norm=99.625, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.227e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 20:59:50,806 (trainer:732) INFO: 34epoch:train:2401-2500batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=78.588, loss_att=61.308, acc=0.703, loss=66.492, backward_time=1.026, grad_norm=110.532, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.226e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 20:59:52,151 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-10 21:00:10,154 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 21:00:13,596 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5ff2e5d630>)
+[gpub007:0/64] 2023-07-10 21:00:13,596 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-10 21:00:13,602 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 21:06:20,656 (trainer:732) INFO: 34epoch:train:2501-2600batch: iter_time=1.226, forward_time=0.205, loss_ctc=67.687, loss_att=47.252, acc=0.712, loss=53.382, backward_time=1.046, grad_norm=104.319, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.185, optim0_lr0=6.225e-05, train_time=7.797
+[gpub007:0/64] 2023-07-10 21:08:37,221 (trainer:732) INFO: 34epoch:train:2601-2700batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=71.634, loss_att=53.817, acc=0.710, loss=59.162, backward_time=1.031, grad_norm=126.412, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.224e-05, train_time=2.731
+[gpub007:0/64] 2023-07-10 21:10:53,003 (trainer:732) INFO: 34epoch:train:2701-2800batch: iter_time=1.325e-04, forward_time=0.146, loss_ctc=60.465, loss_att=47.102, acc=0.719, loss=51.111, backward_time=1.028, grad_norm=96.180, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.223e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 21:13:08,707 (trainer:732) INFO: 34epoch:train:2801-2900batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=65.345, loss_att=47.927, acc=0.710, loss=53.152, backward_time=1.027, grad_norm=131.391, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.222e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 21:15:24,669 (trainer:732) INFO: 34epoch:train:2901-3000batch: iter_time=1.231e-04, forward_time=0.144, loss_ctc=78.791, loss_att=61.741, acc=0.699, loss=66.856, backward_time=1.029, grad_norm=121.681, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.181, optim0_lr0=6.221e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 21:17:40,573 (trainer:732) INFO: 34epoch:train:3001-3100batch: iter_time=1.144e-04, forward_time=0.145, loss_ctc=75.539, loss_att=55.371, acc=0.713, loss=61.422, backward_time=1.028, grad_norm=128.618, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.181, optim0_lr0=6.220e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 21:19:56,364 (trainer:732) INFO: 34epoch:train:3101-3200batch: iter_time=1.306e-04, forward_time=0.144, loss_ctc=68.736, loss_att=51.547, acc=0.723, loss=56.704, backward_time=1.028, grad_norm=107.194, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.181, optim0_lr0=6.219e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 21:22:12,480 (trainer:732) INFO: 34epoch:train:3201-3300batch: iter_time=1.340e-04, forward_time=0.146, loss_ctc=78.672, loss_att=57.227, acc=0.714, loss=63.660, backward_time=1.030, grad_norm=115.871, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.218e-05, train_time=2.722
+[gpub007:0/64] 2023-07-10 21:23:09,921 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-10 21:23:28,147 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 21:23:31,586 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5fe9f077c0>)
+[gpub007:0/64] 2023-07-10 21:23:31,586 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-10 21:23:31,601 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 21:29:15,023 (trainer:732) INFO: 34epoch:train:3301-3400batch: iter_time=2.754, forward_time=0.146, loss_ctc=67.965, loss_att=49.802, acc=0.713, loss=55.251, backward_time=1.043, grad_norm=95.507, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.218e-05, train_time=8.451
+[gpub007:0/64] 2023-07-10 21:31:34,354 (trainer:732) INFO: 34epoch:train:3401-3500batch: iter_time=1.313e-04, forward_time=0.146, loss_ctc=65.794, loss_att=48.214, acc=0.719, loss=53.488, backward_time=1.032, grad_norm=100.118, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.217e-05, train_time=2.786
+[gpub007:0/64] 2023-07-10 21:33:49,883 (trainer:732) INFO: 34epoch:train:3501-3600batch: iter_time=1.592e-04, forward_time=0.146, loss_ctc=64.599, loss_att=50.354, acc=0.701, loss=54.628, backward_time=1.025, grad_norm=99.074, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.216e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 21:36:05,415 (trainer:732) INFO: 34epoch:train:3601-3700batch: iter_time=1.473e-04, forward_time=0.147, loss_ctc=66.878, loss_att=49.527, acc=0.705, loss=54.733, backward_time=1.026, grad_norm=116.845, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.215e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 21:38:21,051 (trainer:732) INFO: 34epoch:train:3701-3800batch: iter_time=1.548e-04, forward_time=0.146, loss_ctc=71.792, loss_att=56.733, acc=0.691, loss=61.251, backward_time=1.027, grad_norm=114.711, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.214e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 21:40:36,738 (trainer:732) INFO: 34epoch:train:3801-3900batch: iter_time=1.420e-04, forward_time=0.146, loss_ctc=80.471, loss_att=58.192, acc=0.699, loss=64.876, backward_time=1.027, grad_norm=127.665, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.213e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 21:42:52,174 (trainer:732) INFO: 34epoch:train:3901-4000batch: iter_time=1.440e-04, forward_time=0.146, loss_ctc=68.267, loss_att=50.941, acc=0.715, loss=56.139, backward_time=1.025, grad_norm=97.636, clip=100.000, loss_scale=7.923e+28, optim_step_time=0.182, optim0_lr0=6.212e-05, train_time=2.709
+[gpub007:0/64] 2023-07-10 21:45:07,809 (trainer:732) INFO: 34epoch:train:4001-4100batch: iter_time=1.322e-04, forward_time=0.146, loss_ctc=76.407, loss_att=54.587, acc=0.714, loss=61.133, backward_time=1.028, grad_norm=117.066, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.211e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 21:46:38,902 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-10 21:46:56,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 21:47:00,307 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b3ad277c0>)
+[gpub007:0/64] 2023-07-10 21:47:00,307 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-10 21:47:00,313 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 21:50:44,681 (trainer:732) INFO: 34epoch:train:4101-4200batch: iter_time=1.211, forward_time=0.147, loss_ctc=67.382, loss_att=49.627, acc=0.728, loss=54.953, backward_time=1.041, grad_norm=101.570, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.210e-05, train_time=6.737
+[gpub007:0/64] 2023-07-10 21:53:01,360 (trainer:732) INFO: 34epoch:train:4201-4300batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=68.126, loss_att=53.262, acc=0.705, loss=57.721, backward_time=1.030, grad_norm=121.194, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.209e-05, train_time=2.733
+[gpub007:0/64] 2023-07-10 21:55:17,215 (trainer:732) INFO: 34epoch:train:4301-4400batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=68.569, loss_att=50.240, acc=0.715, loss=55.739, backward_time=1.029, grad_norm=104.723, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.208e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 21:57:33,187 (trainer:732) INFO: 34epoch:train:4401-4500batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=60.201, loss_att=45.707, acc=0.722, loss=50.055, backward_time=1.029, grad_norm=116.262, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.207e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 21:59:48,907 (trainer:732) INFO: 34epoch:train:4501-4600batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.214, loss_att=51.469, acc=0.706, loss=57.693, backward_time=1.028, grad_norm=117.058, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.206e-05, train_time=2.714
+[gpub007:0/64] 2023-07-10 22:02:05,114 (trainer:732) INFO: 34epoch:train:4601-4700batch: iter_time=1.239e-04, forward_time=0.145, loss_ctc=72.669, loss_att=56.498, acc=0.707, loss=61.349, backward_time=1.030, grad_norm=122.328, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.205e-05, train_time=2.724
+[gpub007:0/64] 2023-07-10 22:04:21,389 (trainer:732) INFO: 34epoch:train:4701-4800batch: iter_time=1.226e-04, forward_time=0.146, loss_ctc=78.784, loss_att=57.694, acc=0.718, loss=64.021, backward_time=1.032, grad_norm=121.739, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.204e-05, train_time=2.725
+[gpub007:0/64] 2023-07-10 22:06:37,178 (trainer:732) INFO: 34epoch:train:4801-4900batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=65.076, loss_att=49.119, acc=0.721, loss=53.906, backward_time=1.029, grad_norm=104.694, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.203e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 22:08:53,142 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-10 22:09:10,979 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 22:09:14,481 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65df3c6410>)
+[gpub007:0/64] 2023-07-10 22:09:14,482 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-10 22:09:14,488 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 22:13:11,552 (trainer:732) INFO: 34epoch:train:4901-5000batch: iter_time=1.224, forward_time=0.146, loss_ctc=77.627, loss_att=59.803, acc=0.717, loss=65.150, backward_time=1.036, grad_norm=109.199, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.202e-05, train_time=7.887
+[gpub007:0/64] 2023-07-10 22:15:29,578 (trainer:732) INFO: 34epoch:train:5001-5100batch: iter_time=1.154e-04, forward_time=0.147, loss_ctc=67.150, loss_att=46.137, acc=0.723, loss=52.441, backward_time=1.038, grad_norm=110.471, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.201e-05, train_time=2.760
+[gpub007:0/64] 2023-07-10 22:17:46,037 (trainer:732) INFO: 34epoch:train:5101-5200batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=70.295, loss_att=53.173, acc=0.716, loss=58.309, backward_time=1.028, grad_norm=114.633, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.200e-05, train_time=2.729
+[gpub007:0/64] 2023-07-10 22:20:01,627 (trainer:732) INFO: 34epoch:train:5201-5300batch: iter_time=1.311e-04, forward_time=0.145, loss_ctc=59.916, loss_att=46.780, acc=0.720, loss=50.721, backward_time=1.027, grad_norm=106.684, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.199e-05, train_time=2.712
+[gpub007:0/64] 2023-07-10 22:22:17,708 (trainer:732) INFO: 34epoch:train:5301-5400batch: iter_time=1.188e-04, forward_time=0.147, loss_ctc=65.392, loss_att=47.487, acc=0.715, loss=52.858, backward_time=1.029, grad_norm=84.710, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.198e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 22:24:37,033 (trainer:732) INFO: 34epoch:train:5401-5500batch: iter_time=1.233e-04, forward_time=0.145, loss_ctc=77.678, loss_att=61.020, acc=0.701, loss=66.017, backward_time=1.032, grad_norm=122.241, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.197e-05, train_time=2.786
+[gpub007:0/64] 2023-07-10 22:26:59,290 (trainer:732) INFO: 34epoch:train:5501-5600batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=74.614, loss_att=53.719, acc=0.718, loss=59.988, backward_time=1.049, grad_norm=118.817, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.196e-05, train_time=2.845
+[gpub007:0/64] 2023-07-10 22:29:29,744 (trainer:732) INFO: 34epoch:train:5601-5700batch: iter_time=1.290e-04, forward_time=0.146, loss_ctc=68.752, loss_att=52.264, acc=0.727, loss=57.210, backward_time=1.043, grad_norm=87.526, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.196e-05, train_time=3.009
+[gpub007:0/64] 2023-07-10 22:31:45,668 (trainer:732) INFO: 34epoch:train:5701-5800batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=75.418, loss_att=56.265, acc=0.719, loss=62.011, backward_time=1.030, grad_norm=141.473, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.195e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 22:32:35,295 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-10 22:32:52,948 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 22:32:56,273 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a71d33cd0>)
+[gpub007:0/64] 2023-07-10 22:32:56,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-10 22:32:56,288 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 22:38:22,074 (trainer:732) INFO: 34epoch:train:5801-5900batch: iter_time=1.241, forward_time=0.145, loss_ctc=68.901, loss_att=49.472, acc=0.721, loss=55.301, backward_time=1.045, grad_norm=106.864, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.194e-05, train_time=7.928
+[gpub007:0/64] 2023-07-10 22:40:52,202 (trainer:732) INFO: 34epoch:train:5901-6000batch: iter_time=1.214e-04, forward_time=0.154, loss_ctc=65.372, loss_att=48.613, acc=0.722, loss=53.640, backward_time=1.053, grad_norm=102.720, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.193e-05, train_time=3.002
+[gpub007:0/64] 2023-07-10 22:43:19,810 (trainer:732) INFO: 34epoch:train:6001-6100batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=64.195, loss_att=49.224, acc=0.715, loss=53.716, backward_time=1.038, grad_norm=95.561, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.192e-05, train_time=2.952
+[gpub007:0/64] 2023-07-10 22:45:35,490 (trainer:732) INFO: 34epoch:train:6101-6200batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=66.535, loss_att=49.513, acc=0.718, loss=54.620, backward_time=1.026, grad_norm=100.886, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.191e-05, train_time=2.713
+[gpub007:0/64] 2023-07-10 22:48:03,899 (trainer:732) INFO: 34epoch:train:6201-6300batch: iter_time=1.178e-04, forward_time=0.146, loss_ctc=71.073, loss_att=55.888, acc=0.700, loss=60.443, backward_time=1.055, grad_norm=104.337, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.190e-05, train_time=2.968
+[gpub007:0/64] 2023-07-10 22:50:26,174 (trainer:732) INFO: 34epoch:train:6301-6400batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=76.179, loss_att=56.767, acc=0.712, loss=62.590, backward_time=1.036, grad_norm=121.764, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.189e-05, train_time=2.845
+[gpub007:0/64] 2023-07-10 22:52:42,073 (trainer:732) INFO: 34epoch:train:6401-6500batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=69.003, loss_att=51.965, acc=0.727, loss=57.076, backward_time=1.028, grad_norm=126.176, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.188e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 22:54:58,126 (trainer:732) INFO: 34epoch:train:6501-6600batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=75.306, loss_att=55.111, acc=0.719, loss=61.170, backward_time=1.030, grad_norm=107.882, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.181, optim0_lr0=6.187e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 22:56:30,823 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-10 22:56:48,984 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 22:56:52,426 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59cda0ff70>)
+[gpub007:0/64] 2023-07-10 22:56:52,427 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-10 22:56:52,433 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 23:01:24,170 (trainer:732) INFO: 34epoch:train:6601-6700batch: iter_time=1.295, forward_time=0.191, loss_ctc=67.514, loss_att=50.071, acc=0.726, loss=55.304, backward_time=1.041, grad_norm=91.689, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.185, optim0_lr0=6.186e-05, train_time=7.720
+[gpub007:0/64] 2023-07-10 23:03:41,084 (trainer:732) INFO: 34epoch:train:6701-6800batch: iter_time=1.269e-04, forward_time=0.146, loss_ctc=67.060, loss_att=53.087, acc=0.701, loss=57.279, backward_time=1.027, grad_norm=122.027, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.185e-05, train_time=2.738
+[gpub007:0/64] 2023-07-10 23:05:56,990 (trainer:732) INFO: 34epoch:train:6801-6900batch: iter_time=1.420e-04, forward_time=0.146, loss_ctc=68.406, loss_att=50.635, acc=0.706, loss=55.966, backward_time=1.030, grad_norm=95.825, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.184e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 23:08:12,338 (trainer:732) INFO: 34epoch:train:6901-7000batch: iter_time=1.263e-04, forward_time=0.145, loss_ctc=62.076, loss_att=47.901, acc=0.708, loss=52.153, backward_time=1.025, grad_norm=110.676, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.183e-05, train_time=2.707
+[gpub007:0/64] 2023-07-10 23:10:27,756 (trainer:732) INFO: 34epoch:train:7001-7100batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=69.502, loss_att=48.780, acc=0.709, loss=54.996, backward_time=1.026, grad_norm=108.550, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.182e-05, train_time=2.708
+[gpub007:0/64] 2023-07-10 23:12:43,609 (trainer:732) INFO: 34epoch:train:7101-7200batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=70.722, loss_att=55.840, acc=0.701, loss=60.305, backward_time=1.028, grad_norm=108.433, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.181e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 23:14:59,137 (trainer:732) INFO: 34epoch:train:7201-7300batch: iter_time=1.316e-04, forward_time=0.144, loss_ctc=79.212, loss_att=59.552, acc=0.697, loss=65.450, backward_time=1.027, grad_norm=140.278, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.180e-05, train_time=2.710
+[gpub007:0/64] 2023-07-10 23:17:14,942 (trainer:732) INFO: 34epoch:train:7301-7400batch: iter_time=1.309e-04, forward_time=0.145, loss_ctc=65.601, loss_att=48.519, acc=0.722, loss=53.643, backward_time=1.027, grad_norm=105.796, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.179e-05, train_time=2.716
+[gpub007:0/64] 2023-07-10 23:19:42,933 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-10 23:20:00,973 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 23:20:04,452 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b3be8b520>)
+[gpub007:0/64] 2023-07-10 23:20:04,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-10 23:20:04,468 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 23:24:16,749 (trainer:732) INFO: 34epoch:train:7401-7500batch: iter_time=2.721, forward_time=0.149, loss_ctc=77.532, loss_att=59.688, acc=0.706, loss=65.041, backward_time=1.035, grad_norm=123.327, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.178e-05, train_time=8.436
+[gpub007:0/64] 2023-07-10 23:26:35,748 (trainer:732) INFO: 34epoch:train:7501-7600batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=68.347, loss_att=53.228, acc=0.703, loss=57.763, backward_time=1.036, grad_norm=111.743, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.178e-05, train_time=2.780
+[gpub007:0/64] 2023-07-10 23:28:51,816 (trainer:732) INFO: 34epoch:train:7601-7700batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=69.126, loss_att=49.852, acc=0.716, loss=55.635, backward_time=1.029, grad_norm=106.716, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.177e-05, train_time=2.721
+[gpub007:0/64] 2023-07-10 23:31:07,805 (trainer:732) INFO: 34epoch:train:7701-7800batch: iter_time=1.161e-04, forward_time=0.147, loss_ctc=60.326, loss_att=46.853, acc=0.717, loss=50.895, backward_time=1.028, grad_norm=122.260, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.176e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 23:33:23,743 (trainer:732) INFO: 34epoch:train:7801-7900batch: iter_time=1.140e-04, forward_time=0.147, loss_ctc=68.286, loss_att=48.469, acc=0.715, loss=54.414, backward_time=1.029, grad_norm=109.396, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.175e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 23:35:39,580 (trainer:732) INFO: 34epoch:train:7901-8000batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=72.779, loss_att=56.400, acc=0.703, loss=61.313, backward_time=1.028, grad_norm=118.306, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.174e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 23:37:55,594 (trainer:732) INFO: 34epoch:train:8001-8100batch: iter_time=1.075e-04, forward_time=0.147, loss_ctc=76.493, loss_att=57.884, acc=0.713, loss=63.467, backward_time=1.029, grad_norm=116.105, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.173e-05, train_time=2.720
+[gpub007:0/64] 2023-07-10 23:40:11,471 (trainer:732) INFO: 34epoch:train:8101-8200batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=64.637, loss_att=48.240, acc=0.724, loss=53.159, backward_time=1.029, grad_norm=104.182, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.172e-05, train_time=2.717
+[gpub007:0/64] 2023-07-10 23:42:27,255 (trainer:732) INFO: 34epoch:train:8201-8300batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=77.723, loss_att=60.042, acc=0.712, loss=65.346, backward_time=1.029, grad_norm=111.442, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.171e-05, train_time=2.715
+[gpub007:0/64] 2023-07-10 23:43:16,673 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-10 23:43:34,915 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-10 23:43:38,592 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59bac0a7d0>)
+[gpub007:0/64] 2023-07-10 23:43:38,592 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-10 23:43:38,599 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-10 23:48:42,803 (trainer:732) INFO: 34epoch:train:8301-8400batch: iter_time=1.216, forward_time=0.146, loss_ctc=65.185, loss_att=48.408, acc=0.703, loss=53.441, backward_time=1.051, grad_norm=105.433, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.170e-05, train_time=7.511
+[gpub007:0/64] 2023-07-10 23:51:00,088 (trainer:732) INFO: 34epoch:train:8401-8500batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=69.581, loss_att=51.875, acc=0.714, loss=57.186, backward_time=1.030, grad_norm=104.887, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.169e-05, train_time=2.745
+[gpub007:0/64] 2023-07-10 23:53:16,048 (trainer:732) INFO: 34epoch:train:8501-8600batch: iter_time=9.486e-05, forward_time=0.144, loss_ctc=58.548, loss_att=46.792, acc=0.708, loss=50.319, backward_time=1.028, grad_norm=92.087, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.168e-05, train_time=2.719
+[gpub007:0/64] 2023-07-10 23:55:31,942 (trainer:732) INFO: 34epoch:train:8601-8700batch: iter_time=9.773e-05, forward_time=0.146, loss_ctc=65.359, loss_att=47.129, acc=0.709, loss=52.598, backward_time=1.028, grad_norm=112.394, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.167e-05, train_time=2.718
+[gpub007:0/64] 2023-07-10 23:57:47,828 (trainer:732) INFO: 34epoch:train:8701-8800batch: iter_time=9.795e-05, forward_time=0.145, loss_ctc=76.906, loss_att=60.671, acc=0.693, loss=65.541, backward_time=1.028, grad_norm=115.226, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.166e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 00:00:03,516 (trainer:732) INFO: 34epoch:train:8801-8900batch: iter_time=1.040e-04, forward_time=0.145, loss_ctc=74.389, loss_att=54.275, acc=0.704, loss=60.309, backward_time=1.027, grad_norm=99.943, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.165e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 00:02:19,258 (trainer:732) INFO: 34epoch:train:8901-9000batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=67.789, loss_att=51.242, acc=0.721, loss=56.206, backward_time=1.028, grad_norm=99.008, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.164e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 00:04:35,099 (trainer:732) INFO: 34epoch:train:9001-9100batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=74.780, loss_att=54.151, acc=0.714, loss=60.339, backward_time=1.028, grad_norm=105.329, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.163e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 00:06:05,782 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-11 00:06:24,337 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 00:06:27,764 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65df15b460>)
+[gpub007:0/64] 2023-07-11 00:06:27,764 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-11 00:06:27,770 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 00:09:52,692 (trainer:732) INFO: 34epoch:train:9101-9200batch: iter_time=1.233, forward_time=0.145, loss_ctc=67.539, loss_att=52.675, acc=0.694, loss=57.134, backward_time=1.037, grad_norm=101.310, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.162e-05, train_time=6.352
+[gpub007:0/64] 2023-07-11 00:12:09,529 (trainer:732) INFO: 34epoch:train:9201-9300batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=65.178, loss_att=48.657, acc=0.714, loss=53.613, backward_time=1.028, grad_norm=106.251, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.162e-05, train_time=2.737
+[gpub007:0/64] 2023-07-11 00:14:27,077 (trainer:732) INFO: 34epoch:train:9301-9400batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=64.710, loss_att=49.874, acc=0.699, loss=54.325, backward_time=1.028, grad_norm=107.263, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.161e-05, train_time=2.751
+[gpub007:0/64] 2023-07-11 00:16:48,717 (trainer:732) INFO: 34epoch:train:9401-9500batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=65.829, loss_att=48.090, acc=0.712, loss=53.411, backward_time=1.050, grad_norm=141.473, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.160e-05, train_time=2.833
+[gpub007:0/64] 2023-07-11 00:19:09,415 (trainer:732) INFO: 34epoch:train:9501-9600batch: iter_time=1.246e-04, forward_time=0.145, loss_ctc=70.534, loss_att=55.635, acc=0.695, loss=60.105, backward_time=1.031, grad_norm=134.685, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.159e-05, train_time=2.814
+[gpub007:0/64] 2023-07-11 00:21:25,823 (trainer:732) INFO: 34epoch:train:9601-9700batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=77.555, loss_att=56.786, acc=0.703, loss=63.017, backward_time=1.027, grad_norm=114.296, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.158e-05, train_time=2.728
+[gpub007:0/64] 2023-07-11 00:23:49,641 (trainer:732) INFO: 34epoch:train:9701-9800batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=68.079, loss_att=51.578, acc=0.715, loss=56.528, backward_time=1.040, grad_norm=99.424, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.157e-05, train_time=2.876
+[gpub007:0/64] 2023-07-11 00:26:15,750 (trainer:732) INFO: 34epoch:train:9801-9900batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=77.070, loss_att=55.145, acc=0.712, loss=61.723, backward_time=1.048, grad_norm=113.370, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.156e-05, train_time=2.922
+[gpub007:0/64] 2023-07-11 00:28:31,403 (trainer:732) INFO: 34epoch:train:9901-10000batch: iter_time=1.101e-04, forward_time=0.145, loss_ctc=64.300, loss_att=53.015, acc=0.707, loss=56.400, backward_time=1.028, grad_norm=109.344, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.155e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 00:41:09,414 (trainer:338) INFO: 34epoch results: [train] iter_time=0.226, forward_time=0.147, loss_ctc=70.190, loss_att=52.756, acc=0.709, loss=57.986, backward_time=1.032, grad_norm=111.980, clip=100.000, loss_scale=1.585e+29, optim_step_time=0.182, optim0_lr0=6.202e-05, train_time=3.374, time=4 hours, 41 minutes and 22.59 seconds, total_count=310000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=44.610, cer_ctc=0.262, loss_att=39.048, acc=0.662, cer=0.431, wer=1.000, loss=40.717, time=6 minutes and 29.51 seconds, total_count=31878, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 56.54 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-11 00:41:24,676 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-11 00:41:24,684 (trainer:272) INFO: 35/50epoch started. Estimated time to finish: 3 days, 5 hours and 45 minutes
+[gpub007:0/64] 2023-07-11 00:41:24,687 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-11 00:41:42,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 00:41:46,222 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a49c02cb0>)
+[gpub007:0/64] 2023-07-11 00:41:46,222 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-11 00:41:46,243 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 00:45:55,455 (trainer:732) INFO: 35epoch:train:1-100batch: iter_time=1.294, forward_time=0.145, loss_ctc=71.057, loss_att=58.298, acc=0.680, loss=62.126, backward_time=1.041, grad_norm=105.827, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.154e-05, train_time=5.415
+[gpub007:0/64] 2023-07-11 00:48:11,981 (trainer:732) INFO: 35epoch:train:101-200batch: iter_time=1.232e-04, forward_time=0.149, loss_ctc=65.096, loss_att=50.184, acc=0.701, loss=54.658, backward_time=1.031, grad_norm=101.520, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.153e-05, train_time=2.730
+[gpub007:0/64] 2023-07-11 00:50:28,253 (trainer:732) INFO: 35epoch:train:201-300batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=79.966, loss_att=67.116, acc=0.694, loss=70.971, backward_time=1.027, grad_norm=126.565, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.152e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 00:52:46,612 (trainer:732) INFO: 35epoch:train:301-400batch: iter_time=1.065e-04, forward_time=0.165, loss_ctc=81.643, loss_att=61.327, acc=0.691, loss=67.422, backward_time=1.033, grad_norm=183.099, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.184, optim0_lr0=6.151e-05, train_time=2.766
+[gpub007:0/64] 2023-07-11 00:55:03,601 (trainer:732) INFO: 35epoch:train:401-500batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=59.117, loss_att=40.342, acc=0.727, loss=45.975, backward_time=1.026, grad_norm=107.675, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.150e-05, train_time=2.741
+[gpub007:0/64] 2023-07-11 00:57:22,105 (trainer:732) INFO: 35epoch:train:501-600batch: iter_time=1.049e-04, forward_time=0.156, loss_ctc=72.909, loss_att=59.163, acc=0.693, loss=63.287, backward_time=1.030, grad_norm=104.951, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.149e-05, train_time=2.770
+[gpub007:0/64] 2023-07-11 01:00:03,445 (trainer:732) INFO: 35epoch:train:601-700batch: iter_time=6.204e-04, forward_time=0.224, loss_ctc=68.252, loss_att=50.145, acc=0.704, loss=55.577, backward_time=1.068, grad_norm=120.151, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.185, optim0_lr0=6.148e-05, train_time=3.227
+[gpub007:0/64] 2023-07-11 01:02:19,556 (trainer:732) INFO: 35epoch:train:701-800batch: iter_time=1.048e-04, forward_time=0.145, loss_ctc=69.526, loss_att=49.297, acc=0.715, loss=55.366, backward_time=1.029, grad_norm=101.080, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.148e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 01:03:11,525 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-11 01:03:29,024 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 01:03:32,635 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a49c588e0>)
+[gpub007:0/64] 2023-07-11 01:03:32,635 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-11 01:03:32,642 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 01:07:39,723 (trainer:732) INFO: 35epoch:train:801-900batch: iter_time=1.439, forward_time=0.144, loss_ctc=73.510, loss_att=56.228, acc=0.691, loss=61.413, backward_time=1.043, grad_norm=116.311, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.147e-05, train_time=6.403
+[gpub007:0/64] 2023-07-11 01:09:56,084 (trainer:732) INFO: 35epoch:train:901-1000batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=67.649, loss_att=52.830, acc=0.706, loss=57.276, backward_time=1.028, grad_norm=96.165, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.146e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 01:12:12,182 (trainer:732) INFO: 35epoch:train:1001-1100batch: iter_time=1.215e-04, forward_time=0.144, loss_ctc=76.518, loss_att=62.802, acc=0.713, loss=66.917, backward_time=1.029, grad_norm=129.549, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.145e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 01:14:28,310 (trainer:732) INFO: 35epoch:train:1101-1200batch: iter_time=1.289e-04, forward_time=0.145, loss_ctc=80.484, loss_att=59.491, acc=0.703, loss=65.789, backward_time=1.029, grad_norm=135.495, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.144e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 01:16:44,028 (trainer:732) INFO: 35epoch:train:1201-1300batch: iter_time=1.366e-04, forward_time=0.146, loss_ctc=59.011, loss_att=42.280, acc=0.722, loss=47.299, backward_time=1.029, grad_norm=104.603, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.183, optim0_lr0=6.143e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 01:18:59,727 (trainer:732) INFO: 35epoch:train:1301-1400batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=70.182, loss_att=57.012, acc=0.705, loss=60.963, backward_time=1.027, grad_norm=123.304, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.142e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 01:21:15,380 (trainer:732) INFO: 35epoch:train:1401-1500batch: iter_time=1.227e-04, forward_time=0.145, loss_ctc=68.956, loss_att=51.980, acc=0.710, loss=57.073, backward_time=1.028, grad_norm=104.932, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.141e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 01:23:30,905 (trainer:732) INFO: 35epoch:train:1501-1600batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=68.553, loss_att=49.058, acc=0.721, loss=54.907, backward_time=1.027, grad_norm=121.556, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.140e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 01:25:09,859 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-11 01:25:28,310 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 01:25:32,079 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6077cffee0>)
+[gpub007:0/64] 2023-07-11 01:25:32,079 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-11 01:25:32,085 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 01:29:32,231 (trainer:732) INFO: 35epoch:train:1601-1700batch: iter_time=2.182, forward_time=0.155, loss_ctc=71.780, loss_att=57.867, acc=0.690, loss=62.041, backward_time=1.042, grad_norm=134.284, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.139e-05, train_time=7.226
+[gpub007:0/64] 2023-07-11 01:31:48,376 (trainer:732) INFO: 35epoch:train:1701-1800batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=72.173, loss_att=57.303, acc=0.692, loss=61.764, backward_time=1.029, grad_norm=136.674, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.138e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 01:34:04,193 (trainer:732) INFO: 35epoch:train:1801-1900batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=66.889, loss_att=57.530, acc=0.701, loss=60.338, backward_time=1.028, grad_norm=107.858, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.137e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 01:36:20,105 (trainer:732) INFO: 35epoch:train:1901-2000batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=79.736, loss_att=62.074, acc=0.696, loss=67.372, backward_time=1.029, grad_norm=133.530, clip=100.000, loss_scale=3.169e+29, optim_step_time=0.182, optim0_lr0=6.136e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 01:38:35,772 (trainer:732) INFO: 35epoch:train:2001-2100batch: iter_time=1.155e-04, forward_time=0.145, loss_ctc=69.250, loss_att=48.513, acc=0.719, loss=54.734, backward_time=1.027, grad_norm=105.912, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.136e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 01:40:51,437 (trainer:732) INFO: 35epoch:train:2101-2200batch: iter_time=1.247e-04, forward_time=0.145, loss_ctc=61.290, loss_att=43.960, acc=0.730, loss=49.159, backward_time=1.028, grad_norm=91.948, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.135e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 01:43:07,740 (trainer:732) INFO: 35epoch:train:2201-2300batch: iter_time=1.224e-04, forward_time=0.147, loss_ctc=73.331, loss_att=60.271, acc=0.683, loss=64.189, backward_time=1.032, grad_norm=122.389, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.134e-05, train_time=2.726
+[gpub007:0/64] 2023-07-11 01:45:23,324 (trainer:732) INFO: 35epoch:train:2301-2400batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=67.864, loss_att=46.445, acc=0.726, loss=52.871, backward_time=1.027, grad_norm=99.322, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.133e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 01:47:39,556 (trainer:732) INFO: 35epoch:train:2401-2500batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=69.151, loss_att=50.577, acc=0.703, loss=56.149, backward_time=1.029, grad_norm=98.676, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.132e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 01:47:42,508 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-11 01:48:00,725 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 01:48:04,464 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aebc9b790>)
+[gpub007:0/64] 2023-07-11 01:48:04,464 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-11 01:48:04,470 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 01:53:47,387 (trainer:732) INFO: 35epoch:train:2501-2600batch: iter_time=1.328, forward_time=0.146, loss_ctc=71.851, loss_att=57.620, acc=0.692, loss=61.889, backward_time=1.038, grad_norm=135.777, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.131e-05, train_time=7.356
+[gpub007:0/64] 2023-07-11 01:56:03,249 (trainer:732) INFO: 35epoch:train:2601-2700batch: iter_time=1.240e-04, forward_time=0.146, loss_ctc=67.460, loss_att=57.145, acc=0.701, loss=60.240, backward_time=1.028, grad_norm=109.035, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.130e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 01:58:18,808 (trainer:732) INFO: 35epoch:train:2701-2800batch: iter_time=1.425e-04, forward_time=0.145, loss_ctc=79.815, loss_att=61.445, acc=0.698, loss=66.956, backward_time=1.027, grad_norm=135.531, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.129e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 02:00:34,368 (trainer:732) INFO: 35epoch:train:2801-2900batch: iter_time=1.466e-04, forward_time=0.146, loss_ctc=69.560, loss_att=49.257, acc=0.717, loss=55.348, backward_time=1.027, grad_norm=106.765, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.128e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 02:02:49,793 (trainer:732) INFO: 35epoch:train:2901-3000batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=63.448, loss_att=45.096, acc=0.725, loss=50.602, backward_time=1.026, grad_norm=99.059, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.127e-05, train_time=2.708
+[gpub007:0/64] 2023-07-11 02:05:05,743 (trainer:732) INFO: 35epoch:train:3001-3100batch: iter_time=1.412e-04, forward_time=0.146, loss_ctc=71.396, loss_att=59.928, acc=0.682, loss=63.368, backward_time=1.030, grad_norm=107.658, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.126e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 02:07:21,375 (trainer:732) INFO: 35epoch:train:3101-3200batch: iter_time=1.381e-04, forward_time=0.146, loss_ctc=66.404, loss_att=45.922, acc=0.727, loss=52.067, backward_time=1.026, grad_norm=129.685, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.125e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 02:09:37,025 (trainer:732) INFO: 35epoch:train:3201-3300batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=67.807, loss_att=48.720, acc=0.712, loss=54.446, backward_time=1.027, grad_norm=88.566, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.124e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 02:10:24,930 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-11 02:10:42,827 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 02:10:46,529 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b24a5f490>)
+[gpub007:0/64] 2023-07-11 02:10:46,529 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-11 02:10:46,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 02:15:46,907 (trainer:732) INFO: 35epoch:train:3301-3400batch: iter_time=1.267, forward_time=0.145, loss_ctc=74.487, loss_att=61.332, acc=0.687, loss=65.278, backward_time=1.054, grad_norm=147.561, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.124e-05, train_time=7.397
+[gpub007:0/64] 2023-07-11 02:18:02,805 (trainer:732) INFO: 35epoch:train:3401-3500batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=64.728, loss_att=49.894, acc=0.720, loss=54.344, backward_time=1.028, grad_norm=102.780, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.123e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 02:20:19,359 (trainer:732) INFO: 35epoch:train:3501-3600batch: iter_time=1.329e-04, forward_time=0.148, loss_ctc=71.956, loss_att=62.087, acc=0.708, loss=65.048, backward_time=1.032, grad_norm=99.123, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.122e-05, train_time=2.731
+[gpub007:0/64] 2023-07-11 02:22:35,236 (trainer:732) INFO: 35epoch:train:3601-3700batch: iter_time=1.430e-04, forward_time=0.147, loss_ctc=73.577, loss_att=55.248, acc=0.715, loss=60.747, backward_time=1.028, grad_norm=116.857, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.121e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 02:24:50,900 (trainer:732) INFO: 35epoch:train:3701-3800batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=68.778, loss_att=48.953, acc=0.719, loss=54.901, backward_time=1.028, grad_norm=103.728, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.120e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 02:27:06,580 (trainer:732) INFO: 35epoch:train:3801-3900batch: iter_time=9.491e-05, forward_time=0.144, loss_ctc=67.110, loss_att=48.630, acc=0.717, loss=54.174, backward_time=1.028, grad_norm=108.396, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.119e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 02:29:22,404 (trainer:732) INFO: 35epoch:train:3901-4000batch: iter_time=1.010e-04, forward_time=0.144, loss_ctc=70.506, loss_att=58.358, acc=0.704, loss=62.003, backward_time=1.029, grad_norm=100.430, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.118e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 02:31:38,118 (trainer:732) INFO: 35epoch:train:4001-4100batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=66.568, loss_att=47.477, acc=0.724, loss=53.204, backward_time=1.027, grad_norm=101.728, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.117e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 02:33:10,343 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-11 02:33:28,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 02:33:32,026 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f651c9a3640>)
+[gpub007:0/64] 2023-07-11 02:33:32,026 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-11 02:33:32,032 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 02:36:40,228 (trainer:732) INFO: 35epoch:train:4101-4200batch: iter_time=1.286, forward_time=0.146, loss_ctc=71.481, loss_att=56.412, acc=0.697, loss=60.933, backward_time=1.044, grad_norm=119.430, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.116e-05, train_time=6.042
+[gpub007:0/64] 2023-07-11 02:38:56,733 (trainer:732) INFO: 35epoch:train:4201-4300batch: iter_time=1.221e-04, forward_time=0.147, loss_ctc=64.522, loss_att=49.545, acc=0.718, loss=54.038, backward_time=1.028, grad_norm=104.859, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.115e-05, train_time=2.730
+[gpub007:0/64] 2023-07-11 02:41:12,944 (trainer:732) INFO: 35epoch:train:4301-4400batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=71.735, loss_att=62.122, acc=0.706, loss=65.006, backward_time=1.029, grad_norm=129.940, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.114e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 02:43:29,074 (trainer:732) INFO: 35epoch:train:4401-4500batch: iter_time=1.249e-04, forward_time=0.147, loss_ctc=73.693, loss_att=57.858, acc=0.713, loss=62.609, backward_time=1.030, grad_norm=135.338, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.113e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 02:45:45,736 (trainer:732) INFO: 35epoch:train:4501-4600batch: iter_time=1.224e-04, forward_time=0.146, loss_ctc=68.255, loss_att=46.938, acc=0.721, loss=53.333, backward_time=1.031, grad_norm=102.361, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.113e-05, train_time=2.733
+[gpub007:0/64] 2023-07-11 02:48:01,424 (trainer:732) INFO: 35epoch:train:4601-4700batch: iter_time=1.406e-04, forward_time=0.147, loss_ctc=66.285, loss_att=48.356, acc=0.719, loss=53.735, backward_time=1.027, grad_norm=123.813, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.112e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 02:50:17,383 (trainer:732) INFO: 35epoch:train:4701-4800batch: iter_time=1.240e-04, forward_time=0.147, loss_ctc=70.652, loss_att=58.125, acc=0.705, loss=61.883, backward_time=1.029, grad_norm=112.224, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.111e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 02:52:33,237 (trainer:732) INFO: 35epoch:train:4801-4900batch: iter_time=1.285e-04, forward_time=0.147, loss_ctc=66.220, loss_att=47.069, acc=0.723, loss=52.814, backward_time=1.030, grad_norm=96.331, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.110e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 02:54:49,141 (trainer:732) INFO: 35epoch:train:4901-5000batch: iter_time=1.119e-04, forward_time=0.146, loss_ctc=74.509, loss_att=56.995, acc=0.707, loss=62.249, backward_time=1.029, grad_norm=103.038, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.109e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 02:54:52,148 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-11 02:55:10,392 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 02:55:13,906 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59c0c6b670>)
+[gpub007:0/64] 2023-07-11 02:55:13,906 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-11 02:55:13,913 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 03:00:01,615 (trainer:732) INFO: 35epoch:train:5001-5100batch: iter_time=1.327, forward_time=0.146, loss_ctc=71.986, loss_att=58.396, acc=0.695, loss=62.473, backward_time=1.042, grad_norm=124.376, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.108e-05, train_time=6.249
+[gpub007:0/64] 2023-07-11 03:02:17,312 (trainer:732) INFO: 35epoch:train:5101-5200batch: iter_time=1.390e-04, forward_time=0.146, loss_ctc=67.031, loss_att=57.719, acc=0.701, loss=60.513, backward_time=1.027, grad_norm=100.455, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.107e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 03:04:32,968 (trainer:732) INFO: 35epoch:train:5201-5300batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=79.778, loss_att=61.892, acc=0.698, loss=67.258, backward_time=1.028, grad_norm=114.980, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.106e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 03:06:48,744 (trainer:732) INFO: 35epoch:train:5301-5400batch: iter_time=1.211e-04, forward_time=0.148, loss_ctc=66.989, loss_att=47.658, acc=0.724, loss=53.458, backward_time=1.028, grad_norm=104.013, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.105e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 03:09:04,095 (trainer:732) INFO: 35epoch:train:5401-5500batch: iter_time=1.338e-04, forward_time=0.146, loss_ctc=60.910, loss_att=44.817, acc=0.727, loss=49.645, backward_time=1.026, grad_norm=127.532, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.104e-05, train_time=2.707
+[gpub007:0/64] 2023-07-11 03:11:21,842 (trainer:732) INFO: 35epoch:train:5501-5600batch: iter_time=1.336e-04, forward_time=0.146, loss_ctc=71.648, loss_att=59.847, acc=0.684, loss=63.387, backward_time=1.027, grad_norm=118.805, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.103e-05, train_time=2.755
+[gpub007:0/64] 2023-07-11 03:13:37,919 (trainer:732) INFO: 35epoch:train:5601-5700batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=66.025, loss_att=45.866, acc=0.725, loss=51.914, backward_time=1.027, grad_norm=97.475, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.103e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 03:15:53,756 (trainer:732) INFO: 35epoch:train:5701-5800batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=69.656, loss_att=49.274, acc=0.710, loss=55.389, backward_time=1.028, grad_norm=99.336, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.102e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 03:16:42,544 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-11 03:17:00,448 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 03:17:03,895 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5ff2d158d0>)
+[gpub007:0/64] 2023-07-11 03:17:03,895 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-11 03:17:03,901 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 03:23:45,879 (trainer:732) INFO: 35epoch:train:5801-5900batch: iter_time=1.271, forward_time=0.166, loss_ctc=74.619, loss_att=61.761, acc=0.688, loss=65.619, backward_time=1.044, grad_norm=127.900, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.183, optim0_lr0=6.101e-05, train_time=9.442
+[gpub007:0/64] 2023-07-11 03:26:02,073 (trainer:732) INFO: 35epoch:train:5901-6000batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=65.728, loss_att=52.202, acc=0.712, loss=56.260, backward_time=1.028, grad_norm=107.061, clip=100.000, loss_scale=6.338e+29, optim_step_time=0.182, optim0_lr0=6.100e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 03:28:18,432 (trainer:732) INFO: 35epoch:train:6001-6100batch: iter_time=1.051e-04, forward_time=0.145, loss_ctc=72.553, loss_att=63.561, acc=0.703, loss=66.258, backward_time=1.030, grad_norm=112.176, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.099e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 03:30:36,801 (trainer:732) INFO: 35epoch:train:6101-6200batch: iter_time=1.182e-04, forward_time=0.145, loss_ctc=72.426, loss_att=55.205, acc=0.715, loss=60.371, backward_time=1.029, grad_norm=114.239, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.098e-05, train_time=2.767
+[gpub007:0/64] 2023-07-11 03:32:52,419 (trainer:732) INFO: 35epoch:train:6201-6300batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=67.725, loss_att=48.404, acc=0.722, loss=54.200, backward_time=1.027, grad_norm=115.081, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.097e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 03:35:08,194 (trainer:732) INFO: 35epoch:train:6301-6400batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=65.164, loss_att=46.345, acc=0.723, loss=51.991, backward_time=1.027, grad_norm=111.594, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.096e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 03:37:24,211 (trainer:732) INFO: 35epoch:train:6401-6500batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=71.981, loss_att=59.921, acc=0.706, loss=63.539, backward_time=1.029, grad_norm=137.257, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.095e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 03:39:48,608 (trainer:732) INFO: 35epoch:train:6501-6600batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=65.220, loss_att=44.876, acc=0.731, loss=50.979, backward_time=1.037, grad_norm=106.366, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.094e-05, train_time=2.888
+[gpub007:0/64] 2023-07-11 03:41:34,779 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-11 03:41:53,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 03:41:56,570 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59c652b7f0>)
+[gpub007:0/64] 2023-07-11 03:41:56,570 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-11 03:41:56,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 03:47:08,821 (trainer:732) INFO: 35epoch:train:6601-6700batch: iter_time=1.269, forward_time=0.145, loss_ctc=65.010, loss_att=50.773, acc=0.703, loss=55.044, backward_time=1.061, grad_norm=98.507, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.093e-05, train_time=8.804
+[gpub007:0/64] 2023-07-11 03:49:25,568 (trainer:732) INFO: 35epoch:train:6701-6800batch: iter_time=1.340e-04, forward_time=0.146, loss_ctc=69.659, loss_att=57.098, acc=0.700, loss=60.867, backward_time=1.033, grad_norm=107.049, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.093e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 03:51:41,501 (trainer:732) INFO: 35epoch:train:6801-6900batch: iter_time=1.224e-04, forward_time=0.145, loss_ctc=66.918, loss_att=56.542, acc=0.704, loss=59.655, backward_time=1.026, grad_norm=111.766, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.092e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 03:53:57,200 (trainer:732) INFO: 35epoch:train:6901-7000batch: iter_time=1.077e-04, forward_time=0.144, loss_ctc=76.672, loss_att=58.767, acc=0.703, loss=64.139, backward_time=1.025, grad_norm=135.940, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.091e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 03:56:12,939 (trainer:732) INFO: 35epoch:train:7001-7100batch: iter_time=1.367e-04, forward_time=0.145, loss_ctc=67.771, loss_att=49.998, acc=0.721, loss=55.330, backward_time=1.027, grad_norm=116.335, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.090e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 03:58:28,536 (trainer:732) INFO: 35epoch:train:7101-7200batch: iter_time=1.583e-04, forward_time=0.146, loss_ctc=60.932, loss_att=43.590, acc=0.730, loss=48.792, backward_time=1.027, grad_norm=117.125, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.089e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 04:00:44,414 (trainer:732) INFO: 35epoch:train:7201-7300batch: iter_time=1.382e-04, forward_time=0.147, loss_ctc=70.503, loss_att=59.078, acc=0.691, loss=62.506, backward_time=1.028, grad_norm=134.477, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.088e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 04:03:00,131 (trainer:732) INFO: 35epoch:train:7301-7400batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=65.698, loss_att=45.779, acc=0.725, loss=51.755, backward_time=1.027, grad_norm=139.588, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.087e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 04:05:15,774 (trainer:732) INFO: 35epoch:train:7401-7500batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=68.809, loss_att=49.404, acc=0.712, loss=55.225, backward_time=1.026, grad_norm=126.521, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.086e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 04:05:18,451 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-11 04:05:36,460 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 04:05:39,891 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f651c8e64a0>)
+[gpub007:0/64] 2023-07-11 04:05:39,892 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-11 04:05:39,898 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 04:12:39,450 (trainer:732) INFO: 35epoch:train:7501-7600batch: iter_time=1.257, forward_time=0.146, loss_ctc=66.129, loss_att=55.947, acc=0.694, loss=59.002, backward_time=1.042, grad_norm=109.742, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.085e-05, train_time=8.873
+[gpub007:0/64] 2023-07-11 04:14:55,636 (trainer:732) INFO: 35epoch:train:7601-7700batch: iter_time=1.305e-04, forward_time=0.145, loss_ctc=64.633, loss_att=48.882, acc=0.709, loss=53.607, backward_time=1.027, grad_norm=95.094, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.084e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 04:17:19,919 (trainer:732) INFO: 35epoch:train:7701-7800batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=76.885, loss_att=63.661, acc=0.710, loss=67.628, backward_time=1.049, grad_norm=104.271, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.084e-05, train_time=2.885
+[gpub007:0/64] 2023-07-11 04:19:36,871 (trainer:732) INFO: 35epoch:train:7801-7900batch: iter_time=1.249e-04, forward_time=0.152, loss_ctc=75.513, loss_att=56.761, acc=0.706, loss=62.387, backward_time=1.031, grad_norm=135.305, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.083e-05, train_time=2.739
+[gpub007:0/64] 2023-07-11 04:22:01,593 (trainer:732) INFO: 35epoch:train:7901-8000batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=59.219, loss_att=40.744, acc=0.730, loss=46.287, backward_time=1.033, grad_norm=94.732, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.082e-05, train_time=2.894
+[gpub007:0/64] 2023-07-11 04:24:17,955 (trainer:732) INFO: 35epoch:train:8001-8100batch: iter_time=1.331e-04, forward_time=0.147, loss_ctc=70.557, loss_att=57.207, acc=0.705, loss=61.212, backward_time=1.030, grad_norm=106.149, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.081e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 04:26:33,643 (trainer:732) INFO: 35epoch:train:8101-8200batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=67.643, loss_att=50.416, acc=0.707, loss=55.584, backward_time=1.025, grad_norm=93.448, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.080e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 04:28:50,730 (trainer:732) INFO: 35epoch:train:8201-8300batch: iter_time=1.128e-04, forward_time=0.156, loss_ctc=67.530, loss_att=48.098, acc=0.723, loss=53.928, backward_time=1.026, grad_norm=100.965, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.079e-05, train_time=2.742
+[gpub007:0/64] 2023-07-11 04:29:45,219 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-11 04:30:02,950 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 04:30:06,302 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59c0d2b7f0>)
+[gpub007:0/64] 2023-07-11 04:30:06,302 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-11 04:30:06,309 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 04:34:27,849 (trainer:732) INFO: 35epoch:train:8301-8400batch: iter_time=1.424, forward_time=0.145, loss_ctc=68.160, loss_att=54.907, acc=0.694, loss=58.883, backward_time=1.057, grad_norm=110.656, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.078e-05, train_time=6.742
+[gpub007:0/64] 2023-07-11 04:36:44,477 (trainer:732) INFO: 35epoch:train:8401-8500batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=64.626, loss_att=49.754, acc=0.719, loss=54.216, backward_time=1.027, grad_norm=101.741, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.077e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 04:39:00,962 (trainer:732) INFO: 35epoch:train:8501-8600batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=72.339, loss_att=62.021, acc=0.709, loss=65.116, backward_time=1.029, grad_norm=105.363, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.076e-05, train_time=2.729
+[gpub007:0/64] 2023-07-11 04:41:19,932 (trainer:732) INFO: 35epoch:train:8601-8700batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=73.102, loss_att=55.859, acc=0.717, loss=61.032, backward_time=1.042, grad_norm=112.970, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.075e-05, train_time=2.779
+[gpub007:0/64] 2023-07-11 04:43:35,769 (trainer:732) INFO: 35epoch:train:8701-8800batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=68.061, loss_att=48.807, acc=0.721, loss=54.583, backward_time=1.026, grad_norm=100.965, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.075e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 04:45:52,213 (trainer:732) INFO: 35epoch:train:8801-8900batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=65.514, loss_att=48.609, acc=0.722, loss=53.680, backward_time=1.028, grad_norm=121.253, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.074e-05, train_time=2.729
+[gpub007:0/64] 2023-07-11 04:48:09,179 (trainer:732) INFO: 35epoch:train:8901-9000batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=69.248, loss_att=57.874, acc=0.712, loss=61.286, backward_time=1.031, grad_norm=124.270, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.073e-05, train_time=2.739
+[gpub007:0/64] 2023-07-11 04:50:25,074 (trainer:732) INFO: 35epoch:train:9001-9100batch: iter_time=1.222e-04, forward_time=0.146, loss_ctc=65.436, loss_att=45.658, acc=0.729, loss=51.591, backward_time=1.028, grad_norm=102.594, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.072e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 04:51:57,364 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-11 04:52:15,147 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 04:52:18,798 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5fb9f679d0>)
+[gpub007:0/64] 2023-07-11 04:52:18,798 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-11 04:52:18,804 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 04:56:08,511 (trainer:732) INFO: 35epoch:train:9101-9200batch: iter_time=1.343, forward_time=0.145, loss_ctc=65.095, loss_att=50.281, acc=0.712, loss=54.725, backward_time=1.037, grad_norm=103.197, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.071e-05, train_time=6.869
+[gpub007:0/64] 2023-07-11 04:58:25,926 (trainer:732) INFO: 35epoch:train:9201-9300batch: iter_time=9.774e-05, forward_time=0.146, loss_ctc=68.834, loss_att=56.585, acc=0.710, loss=60.259, backward_time=1.033, grad_norm=120.324, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.070e-05, train_time=2.748
+[gpub007:0/64] 2023-07-11 05:00:42,775 (trainer:732) INFO: 35epoch:train:9301-9400batch: iter_time=9.473e-05, forward_time=0.146, loss_ctc=66.220, loss_att=54.518, acc=0.715, loss=58.028, backward_time=1.030, grad_norm=109.350, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.069e-05, train_time=2.737
+[gpub007:0/64] 2023-07-11 05:02:59,833 (trainer:732) INFO: 35epoch:train:9401-9500batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=77.266, loss_att=59.688, acc=0.715, loss=64.961, backward_time=1.029, grad_norm=126.954, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.068e-05, train_time=2.741
+[gpub007:0/64] 2023-07-11 05:05:15,709 (trainer:732) INFO: 35epoch:train:9501-9600batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=68.145, loss_att=47.616, acc=0.729, loss=53.775, backward_time=1.026, grad_norm=101.105, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.181, optim0_lr0=6.067e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 05:07:31,651 (trainer:732) INFO: 35epoch:train:9601-9700batch: iter_time=1.342e-04, forward_time=0.147, loss_ctc=60.957, loss_att=45.078, acc=0.729, loss=49.842, backward_time=1.028, grad_norm=104.035, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.066e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 05:09:47,772 (trainer:732) INFO: 35epoch:train:9701-9800batch: iter_time=1.304e-04, forward_time=0.147, loss_ctc=71.986, loss_att=59.815, acc=0.696, loss=63.466, backward_time=1.029, grad_norm=104.690, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.066e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 05:12:03,376 (trainer:732) INFO: 35epoch:train:9801-9900batch: iter_time=1.380e-04, forward_time=0.146, loss_ctc=65.894, loss_att=45.906, acc=0.732, loss=51.903, backward_time=1.027, grad_norm=97.625, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.065e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 05:14:18,858 (trainer:732) INFO: 35epoch:train:9901-10000batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=68.837, loss_att=49.337, acc=0.716, loss=55.187, backward_time=1.025, grad_norm=106.684, clip=100.000, loss_scale=1.268e+30, optim_step_time=0.182, optim0_lr0=6.064e-05, train_time=2.709
+[gpub007:0/64] 2023-07-11 05:27:42,494 (trainer:338) INFO: 35epoch results: [train] iter_time=0.167, forward_time=0.147, loss_ctc=69.359, loss_att=53.354, acc=0.710, loss=58.156, backward_time=1.031, grad_norm=113.328, clip=100.000, loss_scale=8.240e+29, optim_step_time=0.182, optim0_lr0=6.109e-05, train_time=3.275, time=4 hours, 33 minutes and 9.74 seconds, total_count=320000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=45.896, cer_ctc=0.263, loss_att=40.346, acc=0.659, cer=0.430, wer=1.000, loss=42.011, time=7 minutes and 12.43 seconds, total_count=32890, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 55.64 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-11 05:27:58,165 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-11 05:27:58,173 (trainer:272) INFO: 36/50epoch started. Estimated time to finish: 3 days, 38 minutes and 32.8 seconds
+[gpub007:0/64] 2023-07-11 05:27:58,177 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-11 05:28:15,921 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 05:28:19,329 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aac9690c0>)
+[gpub007:0/64] 2023-07-11 05:28:19,329 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-11 05:28:19,335 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 05:32:32,282 (trainer:732) INFO: 36epoch:train:1-100batch: iter_time=1.316, forward_time=0.184, loss_ctc=69.734, loss_att=51.362, acc=0.707, loss=56.874, backward_time=1.037, grad_norm=139.808, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.063e-05, train_time=5.482
+[gpub007:0/64] 2023-07-11 05:34:48,427 (trainer:732) INFO: 36epoch:train:101-200batch: iter_time=1.109e-04, forward_time=0.144, loss_ctc=80.713, loss_att=61.964, acc=0.695, loss=67.589, backward_time=1.029, grad_norm=120.849, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.062e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 05:37:06,954 (trainer:732) INFO: 36epoch:train:201-300batch: iter_time=1.103e-04, forward_time=0.145, loss_ctc=78.128, loss_att=57.601, acc=0.703, loss=63.759, backward_time=1.029, grad_norm=114.190, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.061e-05, train_time=2.772
+[gpub007:0/64] 2023-07-11 05:39:23,721 (trainer:732) INFO: 36epoch:train:301-400batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=77.415, loss_att=56.917, acc=0.706, loss=63.066, backward_time=1.029, grad_norm=111.814, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.060e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 05:41:39,536 (trainer:732) INFO: 36epoch:train:401-500batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=73.724, loss_att=55.345, acc=0.711, loss=60.858, backward_time=1.028, grad_norm=107.701, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.059e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 05:43:58,196 (trainer:732) INFO: 36epoch:train:501-600batch: iter_time=1.238e-04, forward_time=0.150, loss_ctc=78.186, loss_att=58.449, acc=0.692, loss=64.370, backward_time=1.031, grad_norm=130.512, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.058e-05, train_time=2.773
+[gpub007:0/64] 2023-07-11 05:46:21,232 (trainer:732) INFO: 36epoch:train:601-700batch: iter_time=6.214e-04, forward_time=0.163, loss_ctc=75.254, loss_att=57.096, acc=0.719, loss=62.543, backward_time=1.037, grad_norm=146.823, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.184, optim0_lr0=6.058e-05, train_time=2.861
+[gpub007:0/64] 2023-07-11 05:48:40,509 (trainer:732) INFO: 36epoch:train:701-800batch: iter_time=1.389e-04, forward_time=0.147, loss_ctc=80.020, loss_att=50.423, acc=0.711, loss=59.303, backward_time=1.033, grad_norm=145.153, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.057e-05, train_time=2.785
+[gpub007:0/64] 2023-07-11 05:49:34,320 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-11 05:49:52,099 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 05:49:55,493 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5953603c70>)
+[gpub007:0/64] 2023-07-11 05:49:55,493 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-11 05:49:55,501 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 05:54:40,956 (trainer:732) INFO: 36epoch:train:801-900batch: iter_time=1.630, forward_time=0.145, loss_ctc=72.961, loss_att=50.532, acc=0.726, loss=57.261, backward_time=1.036, grad_norm=104.979, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.056e-05, train_time=7.209
+[gpub007:0/64] 2023-07-11 05:56:58,421 (trainer:732) INFO: 36epoch:train:901-1000batch: iter_time=1.194e-04, forward_time=0.148, loss_ctc=65.491, loss_att=53.826, acc=0.697, loss=57.326, backward_time=1.031, grad_norm=98.829, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.055e-05, train_time=2.749
+[gpub007:0/64] 2023-07-11 05:59:14,432 (trainer:732) INFO: 36epoch:train:1001-1100batch: iter_time=1.306e-04, forward_time=0.146, loss_ctc=85.174, loss_att=62.650, acc=0.698, loss=69.407, backward_time=1.028, grad_norm=124.007, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.054e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 06:01:30,579 (trainer:732) INFO: 36epoch:train:1101-1200batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=73.713, loss_att=53.317, acc=0.711, loss=59.436, backward_time=1.030, grad_norm=124.941, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.053e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 06:03:46,675 (trainer:732) INFO: 36epoch:train:1201-1300batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=80.947, loss_att=59.264, acc=0.705, loss=65.769, backward_time=1.030, grad_norm=114.469, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.052e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 06:06:02,340 (trainer:732) INFO: 36epoch:train:1301-1400batch: iter_time=1.293e-04, forward_time=0.145, loss_ctc=71.894, loss_att=53.652, acc=0.707, loss=59.124, backward_time=1.027, grad_norm=118.071, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.051e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 06:08:18,168 (trainer:732) INFO: 36epoch:train:1401-1500batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=75.819, loss_att=58.327, acc=0.708, loss=63.575, backward_time=1.029, grad_norm=113.417, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.050e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 06:10:33,875 (trainer:732) INFO: 36epoch:train:1501-1600batch: iter_time=1.321e-04, forward_time=0.145, loss_ctc=80.999, loss_att=55.306, acc=0.707, loss=63.014, backward_time=1.028, grad_norm=141.020, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.050e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 06:12:07,718 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-11 06:12:25,843 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 06:12:29,298 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5ae39d3400>)
+[gpub007:0/64] 2023-07-11 06:12:29,298 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-11 06:12:29,304 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 06:15:38,958 (trainer:732) INFO: 36epoch:train:1601-1700batch: iter_time=1.267, forward_time=0.145, loss_ctc=73.839, loss_att=51.751, acc=0.721, loss=58.377, backward_time=1.041, grad_norm=136.783, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.049e-05, train_time=6.101
+[gpub007:0/64] 2023-07-11 06:17:55,585 (trainer:732) INFO: 36epoch:train:1701-1800batch: iter_time=1.134e-04, forward_time=0.147, loss_ctc=71.407, loss_att=52.192, acc=0.710, loss=57.956, backward_time=1.032, grad_norm=120.304, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.048e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 06:20:12,511 (trainer:732) INFO: 36epoch:train:1801-1900batch: iter_time=1.109e-04, forward_time=0.147, loss_ctc=76.794, loss_att=59.603, acc=0.703, loss=64.761, backward_time=1.033, grad_norm=113.446, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.047e-05, train_time=2.738
+[gpub007:0/64] 2023-07-11 06:22:28,497 (trainer:732) INFO: 36epoch:train:1901-2000batch: iter_time=1.272e-04, forward_time=0.146, loss_ctc=78.621, loss_att=58.117, acc=0.704, loss=64.269, backward_time=1.030, grad_norm=131.981, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.046e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 06:24:44,371 (trainer:732) INFO: 36epoch:train:2001-2100batch: iter_time=1.327e-04, forward_time=0.146, loss_ctc=74.860, loss_att=55.204, acc=0.705, loss=61.101, backward_time=1.026, grad_norm=125.730, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.045e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 06:27:00,293 (trainer:732) INFO: 36epoch:train:2101-2200batch: iter_time=1.321e-04, forward_time=0.147, loss_ctc=76.349, loss_att=54.688, acc=0.716, loss=61.186, backward_time=1.028, grad_norm=110.411, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.044e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 06:29:15,913 (trainer:732) INFO: 36epoch:train:2201-2300batch: iter_time=1.225e-04, forward_time=0.145, loss_ctc=75.174, loss_att=57.150, acc=0.697, loss=62.558, backward_time=1.027, grad_norm=114.348, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.043e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 06:31:31,591 (trainer:732) INFO: 36epoch:train:2301-2400batch: iter_time=1.299e-04, forward_time=0.145, loss_ctc=73.120, loss_att=56.115, acc=0.721, loss=61.216, backward_time=1.027, grad_norm=117.121, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.043e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 06:33:46,846 (trainer:732) INFO: 36epoch:train:2401-2500batch: iter_time=1.153e-04, forward_time=0.145, loss_ctc=75.214, loss_att=48.631, acc=0.711, loss=56.606, backward_time=1.024, grad_norm=108.186, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.042e-05, train_time=2.705
+[gpub007:0/64] 2023-07-11 06:33:49,488 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-11 06:34:07,811 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 06:34:11,276 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b41003e80>)
+[gpub007:0/64] 2023-07-11 06:34:11,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-11 06:34:11,283 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 06:38:59,026 (trainer:732) INFO: 36epoch:train:2501-2600batch: iter_time=1.329, forward_time=0.189, loss_ctc=76.688, loss_att=57.174, acc=0.711, loss=63.028, backward_time=1.043, grad_norm=146.876, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.184, optim0_lr0=6.041e-05, train_time=6.243
+[gpub007:0/64] 2023-07-11 06:41:14,869 (trainer:732) INFO: 36epoch:train:2601-2700batch: iter_time=1.375e-04, forward_time=0.146, loss_ctc=66.348, loss_att=52.670, acc=0.698, loss=56.774, backward_time=1.028, grad_norm=103.966, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.040e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 06:43:30,832 (trainer:732) INFO: 36epoch:train:2701-2800batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=80.891, loss_att=59.854, acc=0.696, loss=66.166, backward_time=1.030, grad_norm=137.370, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.039e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 06:45:46,669 (trainer:732) INFO: 36epoch:train:2801-2900batch: iter_time=1.504e-04, forward_time=0.147, loss_ctc=75.199, loss_att=54.915, acc=0.700, loss=61.000, backward_time=1.029, grad_norm=118.717, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.038e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 06:48:02,159 (trainer:732) INFO: 36epoch:train:2901-3000batch: iter_time=1.528e-04, forward_time=0.145, loss_ctc=80.228, loss_att=58.977, acc=0.700, loss=65.352, backward_time=1.027, grad_norm=129.205, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.037e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 06:50:18,814 (trainer:732) INFO: 36epoch:train:3001-3100batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=70.750, loss_att=52.963, acc=0.695, loss=58.299, backward_time=1.027, grad_norm=118.744, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.036e-05, train_time=2.733
+[gpub007:0/64] 2023-07-11 06:52:34,819 (trainer:732) INFO: 36epoch:train:3101-3200batch: iter_time=1.275e-04, forward_time=0.146, loss_ctc=76.995, loss_att=59.563, acc=0.703, loss=64.793, backward_time=1.029, grad_norm=122.292, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.035e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 06:54:50,442 (trainer:732) INFO: 36epoch:train:3201-3300batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=74.668, loss_att=50.939, acc=0.720, loss=58.057, backward_time=1.027, grad_norm=106.152, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.035e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 06:55:38,347 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-11 06:55:56,625 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 06:56:00,075 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5e8997bdf0>)
+[gpub007:0/64] 2023-07-11 06:56:00,075 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-11 06:56:00,081 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 07:01:43,152 (trainer:732) INFO: 36epoch:train:3301-3400batch: iter_time=1.268, forward_time=0.146, loss_ctc=79.213, loss_att=54.245, acc=0.708, loss=61.735, backward_time=1.041, grad_norm=121.553, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.034e-05, train_time=8.254
+[gpub007:0/64] 2023-07-11 07:03:59,291 (trainer:732) INFO: 36epoch:train:3401-3500batch: iter_time=1.290e-04, forward_time=0.146, loss_ctc=67.393, loss_att=52.270, acc=0.708, loss=56.807, backward_time=1.029, grad_norm=102.671, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.033e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 07:06:14,976 (trainer:732) INFO: 36epoch:train:3501-3600batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=76.608, loss_att=55.544, acc=0.695, loss=61.863, backward_time=1.026, grad_norm=119.748, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.032e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 07:08:30,641 (trainer:732) INFO: 36epoch:train:3601-3700batch: iter_time=1.285e-04, forward_time=0.147, loss_ctc=77.771, loss_att=59.178, acc=0.704, loss=64.756, backward_time=1.027, grad_norm=117.343, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.031e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 07:10:46,453 (trainer:732) INFO: 36epoch:train:3701-3800batch: iter_time=1.380e-04, forward_time=0.147, loss_ctc=76.208, loss_att=56.389, acc=0.706, loss=62.335, backward_time=1.027, grad_norm=130.722, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.182, optim0_lr0=6.030e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 07:13:02,225 (trainer:732) INFO: 36epoch:train:3801-3900batch: iter_time=1.256e-04, forward_time=0.147, loss_ctc=73.665, loss_att=51.564, acc=0.706, loss=58.194, backward_time=1.027, grad_norm=145.291, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.029e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 07:15:20,931 (trainer:732) INFO: 36epoch:train:3901-4000batch: iter_time=1.107e-04, forward_time=0.145, loss_ctc=76.136, loss_att=59.983, acc=0.694, loss=64.829, backward_time=1.030, grad_norm=125.475, clip=100.000, loss_scale=2.535e+30, optim_step_time=0.183, optim0_lr0=6.028e-05, train_time=2.774
+[gpub007:0/64] 2023-07-11 07:17:39,066 (trainer:732) INFO: 36epoch:train:4001-4100batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=72.278, loss_att=51.631, acc=0.721, loss=57.825, backward_time=1.029, grad_norm=138.892, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.028e-05, train_time=2.762
+[gpub007:0/64] 2023-07-11 07:19:11,306 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-11 07:19:29,501 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 07:19:32,917 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a4e727fa0>)
+[gpub007:0/64] 2023-07-11 07:19:32,917 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-11 07:19:32,924 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 07:23:49,717 (trainer:732) INFO: 36epoch:train:4101-4200batch: iter_time=1.258, forward_time=0.145, loss_ctc=74.806, loss_att=47.319, acc=0.718, loss=55.565, backward_time=1.037, grad_norm=121.847, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.027e-05, train_time=7.413
+[gpub007:0/64] 2023-07-11 07:26:05,910 (trainer:732) INFO: 36epoch:train:4201-4300batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=67.421, loss_att=54.532, acc=0.698, loss=58.398, backward_time=1.027, grad_norm=116.431, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.026e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 07:28:21,761 (trainer:732) INFO: 36epoch:train:4301-4400batch: iter_time=1.398e-04, forward_time=0.146, loss_ctc=75.972, loss_att=53.682, acc=0.705, loss=60.369, backward_time=1.029, grad_norm=138.981, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.025e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 07:30:44,763 (trainer:732) INFO: 36epoch:train:4401-4500batch: iter_time=1.359e-04, forward_time=0.146, loss_ctc=77.323, loss_att=58.094, acc=0.705, loss=63.862, backward_time=1.033, grad_norm=123.716, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.024e-05, train_time=2.860
+[gpub007:0/64] 2023-07-11 07:33:00,662 (trainer:732) INFO: 36epoch:train:4501-4600batch: iter_time=1.484e-04, forward_time=0.146, loss_ctc=74.462, loss_att=56.780, acc=0.705, loss=62.084, backward_time=1.028, grad_norm=113.395, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.023e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 07:35:16,310 (trainer:732) INFO: 36epoch:train:4601-4700batch: iter_time=1.352e-04, forward_time=0.146, loss_ctc=73.043, loss_att=51.398, acc=0.707, loss=57.892, backward_time=1.026, grad_norm=111.405, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.022e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 07:37:35,085 (trainer:732) INFO: 36epoch:train:4701-4800batch: iter_time=1.391e-04, forward_time=0.146, loss_ctc=77.230, loss_att=60.521, acc=0.690, loss=65.534, backward_time=1.034, grad_norm=150.062, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.021e-05, train_time=2.775
+[gpub007:0/64] 2023-07-11 07:39:52,824 (trainer:732) INFO: 36epoch:train:4801-4900batch: iter_time=1.407e-04, forward_time=0.146, loss_ctc=74.151, loss_att=52.223, acc=0.720, loss=58.801, backward_time=1.029, grad_norm=104.004, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.021e-05, train_time=2.755
+[gpub007:0/64] 2023-07-11 07:42:08,392 (trainer:732) INFO: 36epoch:train:4901-5000batch: iter_time=1.246e-04, forward_time=0.145, loss_ctc=73.016, loss_att=47.706, acc=0.721, loss=55.299, backward_time=1.026, grad_norm=132.437, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.020e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 07:42:11,107 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-11 07:42:29,209 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 07:42:32,641 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b278ab520>)
+[gpub007:0/64] 2023-07-11 07:42:32,641 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-11 07:42:32,647 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 07:49:39,825 (trainer:732) INFO: 36epoch:train:5001-5100batch: iter_time=1.267, forward_time=0.146, loss_ctc=78.724, loss_att=57.628, acc=0.719, loss=63.957, backward_time=1.046, grad_norm=119.534, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.019e-05, train_time=9.028
+[gpub007:0/64] 2023-07-11 07:51:56,385 (trainer:732) INFO: 36epoch:train:5101-5200batch: iter_time=1.157e-04, forward_time=0.147, loss_ctc=64.415, loss_att=53.255, acc=0.704, loss=56.603, backward_time=1.029, grad_norm=103.847, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.018e-05, train_time=2.731
+[gpub007:0/64] 2023-07-11 07:54:12,434 (trainer:732) INFO: 36epoch:train:5201-5300batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=80.164, loss_att=59.913, acc=0.708, loss=65.989, backward_time=1.030, grad_norm=142.835, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.017e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 07:56:28,335 (trainer:732) INFO: 36epoch:train:5301-5400batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=76.029, loss_att=54.350, acc=0.712, loss=60.854, backward_time=1.029, grad_norm=131.820, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.016e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 07:58:44,536 (trainer:732) INFO: 36epoch:train:5401-5500batch: iter_time=1.224e-04, forward_time=0.148, loss_ctc=79.618, loss_att=58.225, acc=0.712, loss=64.643, backward_time=1.032, grad_norm=114.021, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.015e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 08:01:00,432 (trainer:732) INFO: 36epoch:train:5501-5600batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=69.623, loss_att=51.254, acc=0.715, loss=56.764, backward_time=1.030, grad_norm=97.814, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.014e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 08:03:16,345 (trainer:732) INFO: 36epoch:train:5601-5700batch: iter_time=1.173e-04, forward_time=0.147, loss_ctc=74.629, loss_att=59.207, acc=0.711, loss=63.834, backward_time=1.029, grad_norm=137.281, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.014e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 08:05:31,881 (trainer:732) INFO: 36epoch:train:5701-5800batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=74.023, loss_att=51.328, acc=0.720, loss=58.136, backward_time=1.027, grad_norm=124.204, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.013e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 08:06:19,407 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-11 08:06:37,660 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 08:06:41,253 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5989b21ab0>)
+[gpub007:0/64] 2023-07-11 08:06:41,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-11 08:06:41,260 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 08:11:40,565 (trainer:732) INFO: 36epoch:train:5801-5900batch: iter_time=1.275, forward_time=0.145, loss_ctc=72.674, loss_att=48.868, acc=0.722, loss=56.010, backward_time=1.038, grad_norm=121.683, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.012e-05, train_time=7.373
+[gpub007:0/64] 2023-07-11 08:13:56,479 (trainer:732) INFO: 36epoch:train:5901-6000batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=64.741, loss_att=51.696, acc=0.702, loss=55.609, backward_time=1.028, grad_norm=137.426, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.011e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 08:16:12,398 (trainer:732) INFO: 36epoch:train:6001-6100batch: iter_time=1.097e-04, forward_time=0.145, loss_ctc=84.266, loss_att=61.492, acc=0.695, loss=68.324, backward_time=1.027, grad_norm=129.812, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.010e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 08:18:31,179 (trainer:732) INFO: 36epoch:train:6101-6200batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=71.841, loss_att=52.502, acc=0.709, loss=58.303, backward_time=1.029, grad_norm=128.818, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.009e-05, train_time=2.775
+[gpub007:0/64] 2023-07-11 08:20:49,113 (trainer:732) INFO: 36epoch:train:6201-6300batch: iter_time=1.129e-04, forward_time=0.145, loss_ctc=78.640, loss_att=57.297, acc=0.706, loss=63.700, backward_time=1.030, grad_norm=167.589, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.008e-05, train_time=2.758
+[gpub007:0/64] 2023-07-11 08:23:04,818 (trainer:732) INFO: 36epoch:train:6301-6400batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=70.718, loss_att=51.754, acc=0.705, loss=57.443, backward_time=1.027, grad_norm=105.437, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.008e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 08:25:20,636 (trainer:732) INFO: 36epoch:train:6401-6500batch: iter_time=1.199e-04, forward_time=0.144, loss_ctc=72.706, loss_att=55.976, acc=0.710, loss=60.995, backward_time=1.028, grad_norm=115.061, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.007e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 08:27:38,686 (trainer:732) INFO: 36epoch:train:6501-6600batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=77.437, loss_att=52.920, acc=0.717, loss=60.275, backward_time=1.028, grad_norm=117.364, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.006e-05, train_time=2.761
+[gpub007:0/64] 2023-07-11 08:29:13,934 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-11 08:29:32,525 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 08:29:35,949 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5989bdb7c0>)
+[gpub007:0/64] 2023-07-11 08:29:35,949 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-11 08:29:35,955 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 08:33:46,466 (trainer:732) INFO: 36epoch:train:6601-6700batch: iter_time=1.284, forward_time=0.145, loss_ctc=72.482, loss_att=51.440, acc=0.725, loss=57.752, backward_time=1.041, grad_norm=109.742, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.005e-05, train_time=7.355
+[gpub007:0/64] 2023-07-11 08:36:02,862 (trainer:732) INFO: 36epoch:train:6701-6800batch: iter_time=1.229e-04, forward_time=0.145, loss_ctc=69.482, loss_att=51.169, acc=0.717, loss=56.663, backward_time=1.030, grad_norm=117.403, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.004e-05, train_time=2.728
+[gpub007:0/64] 2023-07-11 08:38:19,556 (trainer:732) INFO: 36epoch:train:6801-6900batch: iter_time=1.265e-04, forward_time=0.146, loss_ctc=74.186, loss_att=59.903, acc=0.706, loss=64.188, backward_time=1.029, grad_norm=143.346, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.003e-05, train_time=2.734
+[gpub007:0/64] 2023-07-11 08:40:35,268 (trainer:732) INFO: 36epoch:train:6901-7000batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=77.050, loss_att=55.515, acc=0.713, loss=61.976, backward_time=1.028, grad_norm=124.246, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.002e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 08:42:51,239 (trainer:732) INFO: 36epoch:train:7001-7100batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=74.248, loss_att=54.964, acc=0.712, loss=60.749, backward_time=1.029, grad_norm=103.031, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.001e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 08:45:09,725 (trainer:732) INFO: 36epoch:train:7101-7200batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=74.611, loss_att=54.431, acc=0.722, loss=60.485, backward_time=1.038, grad_norm=112.301, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.001e-05, train_time=2.769
+[gpub007:0/64] 2023-07-11 08:47:25,551 (trainer:732) INFO: 36epoch:train:7201-7300batch: iter_time=1.169e-04, forward_time=0.147, loss_ctc=73.404, loss_att=55.914, acc=0.706, loss=61.161, backward_time=1.029, grad_norm=114.249, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=6.000e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 08:49:41,263 (trainer:732) INFO: 36epoch:train:7301-7400batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=73.601, loss_att=55.992, acc=0.723, loss=61.275, backward_time=1.029, grad_norm=113.768, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=5.999e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 08:51:56,745 (trainer:732) INFO: 36epoch:train:7401-7500batch: iter_time=1.104e-04, forward_time=0.146, loss_ctc=74.974, loss_att=47.700, acc=0.719, loss=55.882, backward_time=1.027, grad_norm=120.438, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=5.998e-05, train_time=2.709
+[gpub007:0/64] 2023-07-11 08:51:59,462 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-11 08:52:17,396 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 08:52:20,832 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f66295222f0>)
+[gpub007:0/64] 2023-07-11 08:52:20,832 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-11 08:52:20,838 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 08:58:46,396 (trainer:732) INFO: 36epoch:train:7501-7600batch: iter_time=1.294, forward_time=0.175, loss_ctc=75.220, loss_att=54.638, acc=0.725, loss=60.812, backward_time=1.040, grad_norm=124.573, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.997e-05, train_time=8.192
+[gpub007:0/64] 2023-07-11 09:01:03,359 (trainer:732) INFO: 36epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=64.370, loss_att=52.964, acc=0.705, loss=56.386, backward_time=1.032, grad_norm=109.928, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.183, optim0_lr0=5.996e-05, train_time=2.739
+[gpub007:0/64] 2023-07-11 09:03:21,561 (trainer:732) INFO: 36epoch:train:7701-7800batch: iter_time=1.244e-04, forward_time=0.147, loss_ctc=78.666, loss_att=58.239, acc=0.712, loss=64.367, backward_time=1.031, grad_norm=123.469, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.995e-05, train_time=2.764
+[gpub007:0/64] 2023-07-11 09:05:37,601 (trainer:732) INFO: 36epoch:train:7801-7900batch: iter_time=1.233e-04, forward_time=0.146, loss_ctc=74.832, loss_att=53.817, acc=0.714, loss=60.122, backward_time=1.031, grad_norm=105.908, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.995e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 09:07:53,599 (trainer:732) INFO: 36epoch:train:7901-8000batch: iter_time=1.345e-04, forward_time=0.146, loss_ctc=79.369, loss_att=57.571, acc=0.714, loss=64.110, backward_time=1.031, grad_norm=110.450, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=5.994e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 09:10:09,362 (trainer:732) INFO: 36epoch:train:8001-8100batch: iter_time=1.193e-04, forward_time=0.146, loss_ctc=69.459, loss_att=51.312, acc=0.713, loss=56.756, backward_time=1.028, grad_norm=112.586, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.993e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 09:12:25,536 (trainer:732) INFO: 36epoch:train:8101-8200batch: iter_time=1.293e-04, forward_time=0.147, loss_ctc=73.031, loss_att=57.418, acc=0.718, loss=62.102, backward_time=1.029, grad_norm=137.616, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.992e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 09:14:41,301 (trainer:732) INFO: 36epoch:train:8201-8300batch: iter_time=1.259e-04, forward_time=0.147, loss_ctc=74.386, loss_att=51.113, acc=0.723, loss=58.095, backward_time=1.027, grad_norm=132.786, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.991e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 09:15:40,897 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-11 09:15:59,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 09:16:02,817 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6629557790>)
+[gpub007:0/64] 2023-07-11 09:16:02,818 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-11 09:16:02,824 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 09:20:23,879 (trainer:732) INFO: 36epoch:train:8301-8400batch: iter_time=1.948, forward_time=0.148, loss_ctc=77.413, loss_att=53.279, acc=0.714, loss=60.519, backward_time=1.046, grad_norm=114.993, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.990e-05, train_time=6.851
+[gpub007:0/64] 2023-07-11 09:22:40,474 (trainer:732) INFO: 36epoch:train:8401-8500batch: iter_time=1.305e-04, forward_time=0.145, loss_ctc=67.555, loss_att=53.104, acc=0.710, loss=57.439, backward_time=1.029, grad_norm=107.182, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.989e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 09:24:56,628 (trainer:732) INFO: 36epoch:train:8501-8600batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=74.579, loss_att=53.348, acc=0.704, loss=59.718, backward_time=1.030, grad_norm=141.022, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.989e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 09:27:13,261 (trainer:732) INFO: 36epoch:train:8601-8700batch: iter_time=1.146e-04, forward_time=0.147, loss_ctc=76.259, loss_att=58.412, acc=0.705, loss=63.766, backward_time=1.031, grad_norm=118.202, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.988e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 09:29:29,625 (trainer:732) INFO: 36epoch:train:8701-8800batch: iter_time=1.134e-04, forward_time=0.145, loss_ctc=74.279, loss_att=56.323, acc=0.706, loss=61.710, backward_time=1.032, grad_norm=102.642, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.987e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 09:31:45,648 (trainer:732) INFO: 36epoch:train:8801-8900batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=73.739, loss_att=51.773, acc=0.708, loss=58.363, backward_time=1.027, grad_norm=105.295, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.986e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 09:34:03,701 (trainer:732) INFO: 36epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.145, loss_ctc=75.303, loss_att=59.854, acc=0.698, loss=64.489, backward_time=1.031, grad_norm=127.216, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.985e-05, train_time=2.761
+[gpub007:0/64] 2023-07-11 09:36:22,691 (trainer:732) INFO: 36epoch:train:9001-9100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=72.461, loss_att=51.721, acc=0.723, loss=57.943, backward_time=1.032, grad_norm=101.159, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.984e-05, train_time=2.780
+[gpub007:0/64] 2023-07-11 09:37:55,959 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-11 09:38:14,194 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 09:38:17,890 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59679df4f0>)
+[gpub007:0/64] 2023-07-11 09:38:17,890 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-11 09:38:17,896 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 09:42:47,670 (trainer:732) INFO: 36epoch:train:9101-9200batch: iter_time=1.279, forward_time=0.146, loss_ctc=77.700, loss_att=53.951, acc=0.712, loss=61.076, backward_time=1.039, grad_norm=125.494, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.983e-05, train_time=7.699
+[gpub007:0/64] 2023-07-11 09:45:03,895 (trainer:732) INFO: 36epoch:train:9201-9300batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=67.907, loss_att=47.197, acc=0.724, loss=53.410, backward_time=1.030, grad_norm=103.631, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.983e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 09:47:21,117 (trainer:732) INFO: 36epoch:train:9301-9400batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=75.412, loss_att=58.609, acc=0.698, loss=63.650, backward_time=1.030, grad_norm=120.995, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.982e-05, train_time=2.744
+[gpub007:0/64] 2023-07-11 09:49:36,984 (trainer:732) INFO: 36epoch:train:9401-9500batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=74.342, loss_att=54.732, acc=0.709, loss=60.615, backward_time=1.028, grad_norm=108.000, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.981e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 09:51:53,225 (trainer:732) INFO: 36epoch:train:9501-9600batch: iter_time=1.300e-04, forward_time=0.146, loss_ctc=74.148, loss_att=54.822, acc=0.706, loss=60.620, backward_time=1.027, grad_norm=115.651, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.980e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 09:54:09,043 (trainer:732) INFO: 36epoch:train:9601-9700batch: iter_time=1.255e-04, forward_time=0.146, loss_ctc=74.275, loss_att=53.389, acc=0.713, loss=59.655, backward_time=1.026, grad_norm=129.263, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.979e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 09:56:24,844 (trainer:732) INFO: 36epoch:train:9701-9800batch: iter_time=1.189e-04, forward_time=0.146, loss_ctc=74.105, loss_att=55.939, acc=0.695, loss=61.388, backward_time=1.027, grad_norm=127.869, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.978e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 09:58:40,793 (trainer:732) INFO: 36epoch:train:9801-9900batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=71.580, loss_att=55.299, acc=0.723, loss=60.184, backward_time=1.029, grad_norm=134.775, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.977e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 10:00:56,221 (trainer:732) INFO: 36epoch:train:9901-10000batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=73.729, loss_att=45.704, acc=0.728, loss=54.112, backward_time=1.025, grad_norm=134.085, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.977e-05, train_time=2.708
+[gpub007:0/64] 2023-07-11 10:13:19,578 (trainer:338) INFO: 36epoch results: [train] iter_time=0.164, forward_time=0.147, loss_ctc=74.624, loss_att=54.788, acc=0.709, loss=60.739, backward_time=1.030, grad_norm=121.600, clip=100.000, loss_scale=5.071e+30, optim_step_time=0.182, optim0_lr0=6.019e-05, train_time=3.275, time=4 hours, 33 minutes and 8.28 seconds, total_count=330000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=45.403, cer_ctc=0.261, loss_att=40.942, acc=0.659, cer=0.438, wer=1.000, loss=42.280, time=6 minutes and 16.69 seconds, total_count=33902, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 56.42 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-11 10:13:35,156 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-11 10:13:35,262 (trainer:272) INFO: 37/50epoch started. Estimated time to finish: 2 days, 19 hours and 36 minutes
+[gpub007:0/64] 2023-07-11 10:13:35,265 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-11 10:13:53,068 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 10:13:56,420 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a4d060b20>)
+[gpub007:0/64] 2023-07-11 10:13:56,420 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-11 10:13:56,426 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 10:18:04,588 (trainer:732) INFO: 37epoch:train:1-100batch: iter_time=1.275, forward_time=0.148, loss_ctc=73.396, loss_att=59.771, acc=0.684, loss=63.858, backward_time=1.041, grad_norm=127.507, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.976e-05, train_time=5.386
+[gpub007:0/64] 2023-07-11 10:20:41,737 (trainer:732) INFO: 37epoch:train:101-200batch: iter_time=4.899e-04, forward_time=0.313, loss_ctc=79.529, loss_att=56.868, acc=0.699, loss=63.667, backward_time=1.051, grad_norm=117.632, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.188, optim0_lr0=5.975e-05, train_time=3.143
+[gpub007:0/64] 2023-07-11 10:22:59,271 (trainer:732) INFO: 37epoch:train:201-300batch: iter_time=1.262e-04, forward_time=0.146, loss_ctc=69.740, loss_att=52.186, acc=0.698, loss=57.452, backward_time=1.026, grad_norm=104.660, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.974e-05, train_time=2.750
+[gpub007:0/64] 2023-07-11 10:25:16,426 (trainer:732) INFO: 37epoch:train:301-400batch: iter_time=1.299e-04, forward_time=0.144, loss_ctc=72.292, loss_att=54.165, acc=0.684, loss=59.603, backward_time=1.025, grad_norm=125.518, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.973e-05, train_time=2.741
+[gpub007:0/64] 2023-07-11 10:27:34,463 (trainer:732) INFO: 37epoch:train:401-500batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=63.720, loss_att=46.522, acc=0.727, loss=51.681, backward_time=1.027, grad_norm=118.458, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.972e-05, train_time=2.762
+[gpub007:0/64] 2023-07-11 10:29:56,086 (trainer:732) INFO: 37epoch:train:501-600batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=69.319, loss_att=52.141, acc=0.697, loss=57.294, backward_time=1.056, grad_norm=122.864, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.971e-05, train_time=2.832
+[gpub007:0/64] 2023-07-11 10:32:21,236 (trainer:732) INFO: 37epoch:train:601-700batch: iter_time=1.347e-04, forward_time=0.146, loss_ctc=66.499, loss_att=47.009, acc=0.715, loss=52.856, backward_time=1.042, grad_norm=121.537, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.971e-05, train_time=2.903
+[gpub007:0/64] 2023-07-11 10:34:49,561 (trainer:732) INFO: 37epoch:train:701-800batch: iter_time=1.320e-04, forward_time=0.145, loss_ctc=68.531, loss_att=53.636, acc=0.701, loss=58.104, backward_time=1.042, grad_norm=119.565, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.970e-05, train_time=2.966
+[gpub007:0/64] 2023-07-11 10:35:48,543 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-11 10:36:05,996 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 10:36:09,370 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a4d063430>)
+[gpub007:0/64] 2023-07-11 10:36:09,370 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-11 10:36:09,376 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 10:41:12,024 (trainer:732) INFO: 37epoch:train:801-900batch: iter_time=1.591, forward_time=0.146, loss_ctc=68.487, loss_att=51.202, acc=0.698, loss=56.387, backward_time=1.051, grad_norm=122.913, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.969e-05, train_time=7.649
+[gpub007:0/64] 2023-07-11 10:43:29,382 (trainer:732) INFO: 37epoch:train:901-1000batch: iter_time=1.321e-04, forward_time=0.148, loss_ctc=77.861, loss_att=60.035, acc=0.704, loss=65.383, backward_time=1.033, grad_norm=121.738, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.968e-05, train_time=2.747
+[gpub007:0/64] 2023-07-11 10:45:45,646 (trainer:732) INFO: 37epoch:train:1001-1100batch: iter_time=1.477e-04, forward_time=0.147, loss_ctc=70.750, loss_att=50.708, acc=0.712, loss=56.720, backward_time=1.030, grad_norm=104.533, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.967e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 10:48:01,721 (trainer:732) INFO: 37epoch:train:1101-1200batch: iter_time=1.313e-04, forward_time=0.147, loss_ctc=74.678, loss_att=52.029, acc=0.709, loss=58.824, backward_time=1.029, grad_norm=112.328, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.966e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 10:50:17,245 (trainer:732) INFO: 37epoch:train:1201-1300batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=66.879, loss_att=49.112, acc=0.706, loss=54.442, backward_time=1.027, grad_norm=114.777, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.965e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 10:52:33,064 (trainer:732) INFO: 37epoch:train:1301-1400batch: iter_time=1.189e-04, forward_time=0.146, loss_ctc=64.111, loss_att=46.411, acc=0.732, loss=51.721, backward_time=1.029, grad_norm=109.227, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.965e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 10:54:49,009 (trainer:732) INFO: 37epoch:train:1401-1500batch: iter_time=1.101e-04, forward_time=0.145, loss_ctc=72.953, loss_att=53.605, acc=0.719, loss=59.410, backward_time=1.029, grad_norm=125.380, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.964e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 10:57:04,671 (trainer:732) INFO: 37epoch:train:1501-1600batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=63.215, loss_att=47.727, acc=0.712, loss=52.373, backward_time=1.027, grad_norm=122.014, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.963e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 10:58:36,988 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-11 10:58:55,473 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 10:58:58,896 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5abd788cd0>)
+[gpub007:0/64] 2023-07-11 10:58:58,896 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-11 10:58:58,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 11:03:02,868 (trainer:732) INFO: 37epoch:train:1601-1700batch: iter_time=1.300, forward_time=0.147, loss_ctc=68.034, loss_att=50.001, acc=0.717, loss=55.411, backward_time=1.037, grad_norm=108.329, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.962e-05, train_time=7.164
+[gpub007:0/64] 2023-07-11 11:05:19,465 (trainer:732) INFO: 37epoch:train:1701-1800batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=74.537, loss_att=60.118, acc=0.683, loss=64.444, backward_time=1.030, grad_norm=127.510, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.961e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 11:07:35,430 (trainer:732) INFO: 37epoch:train:1801-1900batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=74.236, loss_att=51.703, acc=0.713, loss=58.463, backward_time=1.027, grad_norm=148.643, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.960e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 11:09:51,405 (trainer:732) INFO: 37epoch:train:1901-2000batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=69.162, loss_att=54.264, acc=0.695, loss=58.734, backward_time=1.028, grad_norm=109.083, clip=100.000, loss_scale=1.014e+31, optim_step_time=0.182, optim0_lr0=5.960e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 11:12:07,317 (trainer:732) INFO: 37epoch:train:2001-2100batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=69.839, loss_att=51.446, acc=0.698, loss=56.964, backward_time=1.029, grad_norm=123.054, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.959e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 11:14:22,887 (trainer:732) INFO: 37epoch:train:2101-2200batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=65.376, loss_att=47.470, acc=0.727, loss=52.842, backward_time=1.027, grad_norm=98.798, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.958e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 11:16:38,522 (trainer:732) INFO: 37epoch:train:2201-2300batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=67.293, loss_att=50.480, acc=0.704, loss=55.524, backward_time=1.027, grad_norm=120.763, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.957e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 11:18:54,131 (trainer:732) INFO: 37epoch:train:2301-2400batch: iter_time=1.193e-04, forward_time=0.145, loss_ctc=66.950, loss_att=48.792, acc=0.716, loss=54.240, backward_time=1.026, grad_norm=115.115, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.956e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 11:21:09,741 (trainer:732) INFO: 37epoch:train:2401-2500batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=65.267, loss_att=51.255, acc=0.708, loss=55.459, backward_time=1.028, grad_norm=143.003, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.955e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 11:21:11,290 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-11 11:21:29,387 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 11:21:32,829 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aec7e6110>)
+[gpub007:0/64] 2023-07-11 11:21:32,829 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-11 11:21:32,835 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 11:27:41,279 (trainer:732) INFO: 37epoch:train:2501-2600batch: iter_time=1.271, forward_time=0.147, loss_ctc=74.512, loss_att=56.767, acc=0.713, loss=62.090, backward_time=1.045, grad_norm=114.692, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.954e-05, train_time=7.831
+[gpub007:0/64] 2023-07-11 11:29:57,752 (trainer:732) INFO: 37epoch:train:2601-2700batch: iter_time=1.219e-04, forward_time=0.147, loss_ctc=74.458, loss_att=51.708, acc=0.717, loss=58.533, backward_time=1.029, grad_norm=117.615, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.954e-05, train_time=2.729
+[gpub007:0/64] 2023-07-11 11:32:13,486 (trainer:732) INFO: 37epoch:train:2701-2800batch: iter_time=1.187e-04, forward_time=0.147, loss_ctc=70.284, loss_att=52.794, acc=0.709, loss=58.041, backward_time=1.028, grad_norm=121.598, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.953e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 11:34:29,076 (trainer:732) INFO: 37epoch:train:2801-2900batch: iter_time=1.256e-04, forward_time=0.146, loss_ctc=73.517, loss_att=52.961, acc=0.704, loss=59.128, backward_time=1.027, grad_norm=124.741, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.952e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 11:36:44,873 (trainer:732) INFO: 37epoch:train:2901-3000batch: iter_time=1.128e-04, forward_time=0.147, loss_ctc=62.621, loss_att=46.877, acc=0.726, loss=51.600, backward_time=1.028, grad_norm=106.802, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.951e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 11:39:11,472 (trainer:732) INFO: 37epoch:train:3001-3100batch: iter_time=1.164e-04, forward_time=0.147, loss_ctc=65.661, loss_att=48.617, acc=0.723, loss=53.730, backward_time=1.043, grad_norm=107.600, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.950e-05, train_time=2.932
+[gpub007:0/64] 2023-07-11 11:41:29,942 (trainer:732) INFO: 37epoch:train:3101-3200batch: iter_time=1.193e-04, forward_time=0.171, loss_ctc=66.728, loss_att=47.652, acc=0.723, loss=53.375, backward_time=1.029, grad_norm=108.349, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.183, optim0_lr0=5.949e-05, train_time=2.769
+[gpub007:0/64] 2023-07-11 11:43:51,934 (trainer:732) INFO: 37epoch:train:3201-3300batch: iter_time=1.278e-04, forward_time=0.147, loss_ctc=66.736, loss_att=50.802, acc=0.718, loss=55.582, backward_time=1.032, grad_norm=117.782, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.949e-05, train_time=2.840
+[gpub007:0/64] 2023-07-11 11:44:38,810 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-11 11:44:56,441 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 11:44:59,880 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f596796e710>)
+[gpub007:0/64] 2023-07-11 11:44:59,880 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-11 11:44:59,886 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 11:49:53,465 (trainer:732) INFO: 37epoch:train:3301-3400batch: iter_time=1.328, forward_time=0.213, loss_ctc=71.037, loss_att=56.389, acc=0.703, loss=60.783, backward_time=1.047, grad_norm=112.623, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.187, optim0_lr0=5.948e-05, train_time=7.230
+[gpub007:0/64] 2023-07-11 11:52:09,588 (trainer:732) INFO: 37epoch:train:3401-3500batch: iter_time=1.053e-04, forward_time=0.145, loss_ctc=75.136, loss_att=53.435, acc=0.710, loss=59.945, backward_time=1.028, grad_norm=107.148, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.947e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 11:54:25,665 (trainer:732) INFO: 37epoch:train:3501-3600batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=70.639, loss_att=52.362, acc=0.706, loss=57.845, backward_time=1.028, grad_norm=126.243, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.946e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 11:56:41,580 (trainer:732) INFO: 37epoch:train:3601-3700batch: iter_time=1.267e-04, forward_time=0.145, loss_ctc=73.037, loss_att=52.505, acc=0.709, loss=58.665, backward_time=1.027, grad_norm=134.571, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.945e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 11:58:57,135 (trainer:732) INFO: 37epoch:train:3701-3800batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=62.880, loss_att=47.302, acc=0.721, loss=51.975, backward_time=1.027, grad_norm=95.988, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.944e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 12:01:12,873 (trainer:732) INFO: 37epoch:train:3801-3900batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=67.521, loss_att=49.809, acc=0.723, loss=55.123, backward_time=1.028, grad_norm=130.044, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.944e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 12:03:28,186 (trainer:732) INFO: 37epoch:train:3901-4000batch: iter_time=1.065e-04, forward_time=0.143, loss_ctc=65.221, loss_att=46.138, acc=0.727, loss=51.863, backward_time=1.023, grad_norm=99.350, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.943e-05, train_time=2.706
+[gpub007:0/64] 2023-07-11 12:05:44,173 (trainer:732) INFO: 37epoch:train:4001-4100batch: iter_time=1.056e-04, forward_time=0.145, loss_ctc=68.017, loss_att=51.912, acc=0.717, loss=56.744, backward_time=1.029, grad_norm=120.104, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.942e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 12:07:23,657 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-11 12:07:41,458 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 12:07:44,857 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aa4ad7a00>)
+[gpub007:0/64] 2023-07-11 12:07:44,858 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-11 12:07:44,864 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 12:11:58,637 (trainer:732) INFO: 37epoch:train:4101-4200batch: iter_time=1.304, forward_time=0.146, loss_ctc=66.351, loss_att=47.177, acc=0.724, loss=52.929, backward_time=1.106, grad_norm=98.938, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.941e-05, train_time=7.489
+[gpub007:0/64] 2023-07-11 12:14:15,395 (trainer:732) INFO: 37epoch:train:4201-4300batch: iter_time=1.148e-04, forward_time=0.145, loss_ctc=74.530, loss_att=57.084, acc=0.705, loss=62.318, backward_time=1.030, grad_norm=121.127, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.940e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 12:16:31,965 (trainer:732) INFO: 37epoch:train:4301-4400batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=74.338, loss_att=51.001, acc=0.718, loss=58.002, backward_time=1.033, grad_norm=122.361, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.939e-05, train_time=2.731
+[gpub007:0/64] 2023-07-11 12:18:48,042 (trainer:732) INFO: 37epoch:train:4401-4500batch: iter_time=1.125e-04, forward_time=0.145, loss_ctc=68.798, loss_att=52.278, acc=0.710, loss=57.234, backward_time=1.029, grad_norm=130.252, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.938e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 12:21:03,702 (trainer:732) INFO: 37epoch:train:4501-4600batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=68.909, loss_att=51.637, acc=0.704, loss=56.819, backward_time=1.027, grad_norm=137.823, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.938e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 12:23:19,226 (trainer:732) INFO: 37epoch:train:4601-4700batch: iter_time=1.181e-04, forward_time=0.144, loss_ctc=65.441, loss_att=46.367, acc=0.736, loss=52.090, backward_time=1.026, grad_norm=93.332, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.937e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 12:25:34,979 (trainer:732) INFO: 37epoch:train:4701-4800batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=67.536, loss_att=49.512, acc=0.722, loss=54.919, backward_time=1.028, grad_norm=104.177, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.936e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 12:27:50,659 (trainer:732) INFO: 37epoch:train:4801-4900batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=66.204, loss_att=48.003, acc=0.721, loss=53.464, backward_time=1.028, grad_norm=117.366, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.935e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 12:30:06,421 (trainer:732) INFO: 37epoch:train:4901-5000batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=66.198, loss_att=51.812, acc=0.714, loss=56.128, backward_time=1.027, grad_norm=96.135, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.934e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 12:30:07,967 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-11 12:30:26,550 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 12:30:30,016 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f602a7bfcd0>)
+[gpub007:0/64] 2023-07-11 12:30:30,016 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-11 12:30:30,022 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 12:37:05,336 (trainer:732) INFO: 37epoch:train:5001-5100batch: iter_time=1.338, forward_time=0.146, loss_ctc=72.943, loss_att=57.395, acc=0.704, loss=62.059, backward_time=1.048, grad_norm=113.638, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.933e-05, train_time=8.378
+[gpub007:0/64] 2023-07-11 12:39:21,678 (trainer:732) INFO: 37epoch:train:5101-5200batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=72.681, loss_att=52.867, acc=0.715, loss=58.811, backward_time=1.028, grad_norm=128.302, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.181, optim0_lr0=5.933e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 12:41:48,738 (trainer:732) INFO: 37epoch:train:5201-5300batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=69.373, loss_att=54.118, acc=0.697, loss=58.695, backward_time=1.087, grad_norm=117.537, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.932e-05, train_time=2.941
+[gpub007:0/64] 2023-07-11 12:44:08,249 (trainer:732) INFO: 37epoch:train:5301-5400batch: iter_time=1.302e-04, forward_time=0.146, loss_ctc=73.062, loss_att=52.696, acc=0.703, loss=58.806, backward_time=1.034, grad_norm=120.355, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.931e-05, train_time=2.790
+[gpub007:0/64] 2023-07-11 12:46:23,958 (trainer:732) INFO: 37epoch:train:5401-5500batch: iter_time=1.420e-04, forward_time=0.145, loss_ctc=62.483, loss_att=47.374, acc=0.719, loss=51.907, backward_time=1.027, grad_norm=98.551, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.930e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 12:48:39,675 (trainer:732) INFO: 37epoch:train:5501-5600batch: iter_time=1.315e-04, forward_time=0.146, loss_ctc=65.916, loss_att=50.161, acc=0.712, loss=54.887, backward_time=1.027, grad_norm=100.155, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.929e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 12:50:54,988 (trainer:732) INFO: 37epoch:train:5601-5700batch: iter_time=1.339e-04, forward_time=0.145, loss_ctc=66.600, loss_att=47.484, acc=0.721, loss=53.219, backward_time=1.025, grad_norm=107.539, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.928e-05, train_time=2.706
+[gpub007:0/64] 2023-07-11 12:53:11,049 (trainer:732) INFO: 37epoch:train:5701-5800batch: iter_time=1.475e-04, forward_time=0.146, loss_ctc=66.766, loss_att=49.900, acc=0.718, loss=54.960, backward_time=1.031, grad_norm=110.353, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.928e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 12:53:57,706 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-11 12:54:15,837 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 12:54:19,255 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5967e4b4f0>)
+[gpub007:0/64] 2023-07-11 12:54:19,255 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-11 12:54:19,261 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 12:58:52,511 (trainer:732) INFO: 37epoch:train:5801-5900batch: iter_time=1.304, forward_time=0.147, loss_ctc=69.520, loss_att=53.386, acc=0.699, loss=58.227, backward_time=1.040, grad_norm=113.201, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.927e-05, train_time=6.829
+[gpub007:0/64] 2023-07-11 13:01:09,640 (trainer:732) INFO: 37epoch:train:5901-6000batch: iter_time=1.520e-04, forward_time=0.146, loss_ctc=76.000, loss_att=58.654, acc=0.707, loss=63.858, backward_time=1.032, grad_norm=114.478, clip=100.000, loss_scale=2.028e+31, optim_step_time=0.182, optim0_lr0=5.926e-05, train_time=2.742
+[gpub007:0/64] 2023-07-11 13:03:25,191 (trainer:732) INFO: 37epoch:train:6001-6100batch: iter_time=1.551e-04, forward_time=0.145, loss_ctc=70.812, loss_att=52.626, acc=0.705, loss=58.081, backward_time=1.027, grad_norm=112.754, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.925e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 13:05:41,220 (trainer:732) INFO: 37epoch:train:6101-6200batch: iter_time=1.743e-04, forward_time=0.147, loss_ctc=72.313, loss_att=50.371, acc=0.709, loss=56.954, backward_time=1.030, grad_norm=117.075, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.924e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 13:07:57,111 (trainer:732) INFO: 37epoch:train:6201-6300batch: iter_time=1.548e-04, forward_time=0.147, loss_ctc=63.890, loss_att=47.404, acc=0.710, loss=52.350, backward_time=1.030, grad_norm=103.617, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.923e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 13:10:12,706 (trainer:732) INFO: 37epoch:train:6301-6400batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=65.401, loss_att=48.200, acc=0.729, loss=53.360, backward_time=1.027, grad_norm=118.256, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.923e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 13:12:28,722 (trainer:732) INFO: 37epoch:train:6401-6500batch: iter_time=1.029e-04, forward_time=0.145, loss_ctc=69.854, loss_att=53.100, acc=0.710, loss=58.126, backward_time=1.028, grad_norm=117.987, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.181, optim0_lr0=5.922e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 13:14:44,493 (trainer:732) INFO: 37epoch:train:6501-6600batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=64.042, loss_att=46.908, acc=0.722, loss=52.048, backward_time=1.027, grad_norm=115.001, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.921e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 13:16:18,783 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-11 13:16:37,030 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 13:16:40,470 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59b2d1b7c0>)
+[gpub007:0/64] 2023-07-11 13:16:40,470 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-11 13:16:40,476 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 13:21:07,299 (trainer:732) INFO: 37epoch:train:6601-6700batch: iter_time=1.318, forward_time=0.145, loss_ctc=67.741, loss_att=48.355, acc=0.718, loss=54.171, backward_time=1.039, grad_norm=109.415, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.920e-05, train_time=7.656
+[gpub007:0/64] 2023-07-11 13:23:28,242 (trainer:732) INFO: 37epoch:train:6701-6800batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=72.885, loss_att=57.769, acc=0.697, loss=62.304, backward_time=1.039, grad_norm=111.526, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.919e-05, train_time=2.819
+[gpub007:0/64] 2023-07-11 13:25:56,628 (trainer:732) INFO: 37epoch:train:6801-6900batch: iter_time=2.967e-04, forward_time=0.166, loss_ctc=76.117, loss_att=52.490, acc=0.713, loss=59.578, backward_time=1.050, grad_norm=103.003, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.918e-05, train_time=2.967
+[gpub007:0/64] 2023-07-11 13:28:28,303 (trainer:732) INFO: 37epoch:train:6901-7000batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=68.149, loss_att=52.988, acc=0.705, loss=57.537, backward_time=1.046, grad_norm=105.892, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.918e-05, train_time=3.033
+[gpub007:0/64] 2023-07-11 13:30:46,658 (trainer:732) INFO: 37epoch:train:7001-7100batch: iter_time=1.170e-04, forward_time=0.149, loss_ctc=68.961, loss_att=52.298, acc=0.701, loss=57.297, backward_time=1.032, grad_norm=110.890, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.917e-05, train_time=2.767
+[gpub007:0/64] 2023-07-11 13:33:03,183 (trainer:732) INFO: 37epoch:train:7101-7200batch: iter_time=1.397e-04, forward_time=0.145, loss_ctc=63.978, loss_att=47.625, acc=0.728, loss=52.531, backward_time=1.028, grad_norm=115.364, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.916e-05, train_time=2.730
+[gpub007:0/64] 2023-07-11 13:35:33,789 (trainer:732) INFO: 37epoch:train:7201-7300batch: iter_time=1.228e-04, forward_time=0.154, loss_ctc=68.812, loss_att=51.038, acc=0.706, loss=56.370, backward_time=1.048, grad_norm=126.657, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.183, optim0_lr0=5.915e-05, train_time=3.012
+[gpub007:0/64] 2023-07-11 13:37:49,663 (trainer:732) INFO: 37epoch:train:7301-7400batch: iter_time=1.305e-04, forward_time=0.146, loss_ctc=64.371, loss_att=46.270, acc=0.726, loss=51.700, backward_time=1.029, grad_norm=103.992, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.914e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 13:40:12,866 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-11 13:40:30,561 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 13:40:34,240 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59daedf640>)
+[gpub007:0/64] 2023-07-11 13:40:34,241 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-11 13:40:34,247 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 13:43:52,044 (trainer:732) INFO: 37epoch:train:7401-7500batch: iter_time=1.627, forward_time=0.172, loss_ctc=64.992, loss_att=50.931, acc=0.714, loss=55.150, backward_time=1.035, grad_norm=108.730, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.183, optim0_lr0=5.914e-05, train_time=7.245
+[gpub007:0/64] 2023-07-11 13:46:11,448 (trainer:732) INFO: 37epoch:train:7501-7600batch: iter_time=1.250e-04, forward_time=0.147, loss_ctc=72.067, loss_att=55.669, acc=0.704, loss=60.588, backward_time=1.041, grad_norm=122.676, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.913e-05, train_time=2.790
+[gpub007:0/64] 2023-07-11 13:48:27,455 (trainer:732) INFO: 37epoch:train:7601-7700batch: iter_time=1.194e-04, forward_time=0.145, loss_ctc=76.923, loss_att=52.967, acc=0.716, loss=60.154, backward_time=1.028, grad_norm=120.730, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.912e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 13:50:43,068 (trainer:732) INFO: 37epoch:train:7701-7800batch: iter_time=1.251e-04, forward_time=0.146, loss_ctc=68.362, loss_att=52.743, acc=0.709, loss=57.429, backward_time=1.026, grad_norm=101.121, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.911e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 13:52:58,971 (trainer:732) INFO: 37epoch:train:7801-7900batch: iter_time=1.327e-04, forward_time=0.146, loss_ctc=70.113, loss_att=53.594, acc=0.703, loss=58.550, backward_time=1.029, grad_norm=129.743, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.910e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 13:55:14,656 (trainer:732) INFO: 37epoch:train:7901-8000batch: iter_time=1.337e-04, forward_time=0.145, loss_ctc=62.823, loss_att=45.049, acc=0.737, loss=50.381, backward_time=1.028, grad_norm=119.916, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.909e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 13:57:30,308 (trainer:732) INFO: 37epoch:train:8001-8100batch: iter_time=1.168e-04, forward_time=0.145, loss_ctc=70.501, loss_att=52.455, acc=0.713, loss=57.869, backward_time=1.028, grad_norm=98.464, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.909e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 13:59:46,046 (trainer:732) INFO: 37epoch:train:8101-8200batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=63.191, loss_att=46.242, acc=0.722, loss=51.327, backward_time=1.028, grad_norm=102.264, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.908e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 14:02:02,040 (trainer:732) INFO: 37epoch:train:8201-8300batch: iter_time=1.025e-04, forward_time=0.146, loss_ctc=66.031, loss_att=51.173, acc=0.716, loss=55.631, backward_time=1.030, grad_norm=96.635, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.907e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 14:02:51,500 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-11 14:03:10,181 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 14:03:13,860 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f596536fbe0>)
+[gpub007:0/64] 2023-07-11 14:03:13,860 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-11 14:03:13,866 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 14:09:27,748 (trainer:732) INFO: 37epoch:train:8301-8400batch: iter_time=1.822, forward_time=0.199, loss_ctc=71.059, loss_att=55.503, acc=0.696, loss=60.170, backward_time=1.040, grad_norm=121.867, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.185, optim0_lr0=5.906e-05, train_time=8.914
+[gpub007:0/64] 2023-07-11 14:11:45,404 (trainer:732) INFO: 37epoch:train:8401-8500batch: iter_time=1.342e-04, forward_time=0.146, loss_ctc=74.470, loss_att=53.384, acc=0.713, loss=59.710, backward_time=1.030, grad_norm=123.956, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.905e-05, train_time=2.753
+[gpub007:0/64] 2023-07-11 14:14:01,744 (trainer:732) INFO: 37epoch:train:8501-8600batch: iter_time=1.273e-04, forward_time=0.147, loss_ctc=69.650, loss_att=54.100, acc=0.696, loss=58.765, backward_time=1.029, grad_norm=107.083, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.904e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 14:16:17,478 (trainer:732) INFO: 37epoch:train:8601-8700batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=71.456, loss_att=51.857, acc=0.706, loss=57.737, backward_time=1.029, grad_norm=111.129, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.904e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 14:18:33,020 (trainer:732) INFO: 37epoch:train:8701-8800batch: iter_time=1.248e-04, forward_time=0.144, loss_ctc=63.389, loss_att=47.118, acc=0.718, loss=51.999, backward_time=1.027, grad_norm=125.895, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.903e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 14:20:50,384 (trainer:732) INFO: 37epoch:train:8801-8900batch: iter_time=1.076e-04, forward_time=0.144, loss_ctc=67.778, loss_att=51.668, acc=0.706, loss=56.501, backward_time=1.028, grad_norm=110.082, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.902e-05, train_time=2.747
+[gpub007:0/64] 2023-07-11 14:23:06,644 (trainer:732) INFO: 37epoch:train:8901-9000batch: iter_time=1.167e-04, forward_time=0.144, loss_ctc=63.860, loss_att=45.761, acc=0.724, loss=51.191, backward_time=1.030, grad_norm=102.661, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.901e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 14:25:22,073 (trainer:732) INFO: 37epoch:train:9001-9100batch: iter_time=1.318e-04, forward_time=0.143, loss_ctc=67.755, loss_att=50.844, acc=0.717, loss=55.917, backward_time=1.025, grad_norm=114.657, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.900e-05, train_time=2.708
+[gpub007:0/64] 2023-07-11 14:27:14,523 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-11 14:27:32,936 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 14:27:36,432 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a0401b010>)
+[gpub007:0/64] 2023-07-11 14:27:36,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-11 14:27:36,438 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 14:31:34,585 (trainer:732) INFO: 37epoch:train:9101-9200batch: iter_time=2.226, forward_time=0.145, loss_ctc=66.487, loss_att=47.631, acc=0.719, loss=53.288, backward_time=1.042, grad_norm=100.918, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.900e-05, train_time=7.450
+[gpub007:0/64] 2023-07-11 14:33:51,375 (trainer:732) INFO: 37epoch:train:9201-9300batch: iter_time=1.135e-04, forward_time=0.147, loss_ctc=72.194, loss_att=55.790, acc=0.708, loss=60.711, backward_time=1.032, grad_norm=113.211, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.899e-05, train_time=2.736
+[gpub007:0/64] 2023-07-11 14:36:08,125 (trainer:732) INFO: 37epoch:train:9301-9400batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=73.933, loss_att=50.691, acc=0.724, loss=57.663, backward_time=1.030, grad_norm=106.909, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.898e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 14:38:24,281 (trainer:732) INFO: 37epoch:train:9401-9500batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=69.115, loss_att=52.228, acc=0.711, loss=57.294, backward_time=1.029, grad_norm=102.198, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.897e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 14:40:40,366 (trainer:732) INFO: 37epoch:train:9501-9600batch: iter_time=1.267e-04, forward_time=0.145, loss_ctc=69.005, loss_att=51.201, acc=0.709, loss=56.542, backward_time=1.028, grad_norm=110.687, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.896e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 14:42:56,170 (trainer:732) INFO: 37epoch:train:9601-9700batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=64.277, loss_att=45.290, acc=0.739, loss=50.986, backward_time=1.028, grad_norm=116.655, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.895e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 14:45:11,526 (trainer:732) INFO: 37epoch:train:9701-9800batch: iter_time=1.252e-04, forward_time=0.145, loss_ctc=67.331, loss_att=50.745, acc=0.714, loss=55.721, backward_time=1.026, grad_norm=109.035, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.895e-05, train_time=2.707
+[gpub007:0/64] 2023-07-11 14:47:27,391 (trainer:732) INFO: 37epoch:train:9801-9900batch: iter_time=1.027e-04, forward_time=0.145, loss_ctc=65.575, loss_att=47.666, acc=0.723, loss=53.039, backward_time=1.028, grad_norm=110.732, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.894e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 14:49:43,356 (trainer:732) INFO: 37epoch:train:9901-10000batch: iter_time=1.021e-04, forward_time=0.146, loss_ctc=65.215, loss_att=49.947, acc=0.722, loss=54.527, backward_time=1.028, grad_norm=101.913, clip=100.000, loss_scale=4.056e+31, optim_step_time=0.182, optim0_lr0=5.893e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 15:04:48,088 (trainer:338) INFO: 37epoch results: [train] iter_time=0.177, forward_time=0.149, loss_ctc=68.992, loss_att=51.253, acc=0.712, loss=56.575, backward_time=1.033, grad_norm=114.371, clip=100.000, loss_scale=2.637e+31, optim_step_time=0.182, optim0_lr0=5.934e-05, train_time=3.313, time=4 hours, 36 minutes and 30.99 seconds, total_count=340000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=43.668, cer_ctc=0.256, loss_att=38.075, acc=0.679, cer=0.407, wer=0.998, loss=39.753, time=8 minutes and 38.59 seconds, total_count=34914, gpu_max_cached_mem_GB=37.219, [att_plot] time=6 minutes and 3.24 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-11 15:05:05,925 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-11 15:05:06,141 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/30epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/32epoch.pth
+[gpub007:0/64] 2023-07-11 15:05:06,196 (trainer:272) INFO: 38/50epoch started. Estimated time to finish: 2 days, 14 hours and 49 minutes
+[gpub007:0/64] 2023-07-11 15:05:06,412 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-11 15:05:26,268 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 15:05:31,014 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f66a49bee30>)
+[gpub007:0/64] 2023-07-11 15:05:31,041 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-11 15:05:31,621 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 15:16:30,217 (trainer:732) INFO: 38epoch:train:1-100batch: iter_time=5.379, forward_time=0.192, loss_ctc=70.747, loss_att=53.761, acc=0.692, loss=58.857, backward_time=1.049, grad_norm=119.443, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.188, optim0_lr0=5.892e-05, train_time=13.677
+[gpub007:0/64] 2023-07-11 15:18:45,992 (trainer:732) INFO: 38epoch:train:101-200batch: iter_time=1.250e-04, forward_time=0.145, loss_ctc=73.101, loss_att=53.060, acc=0.700, loss=59.073, backward_time=1.027, grad_norm=122.458, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.891e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 15:21:03,541 (trainer:732) INFO: 38epoch:train:201-300batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=76.026, loss_att=55.987, acc=0.709, loss=61.999, backward_time=1.028, grad_norm=112.894, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.183, optim0_lr0=5.890e-05, train_time=2.751
+[gpub007:0/64] 2023-07-11 15:23:19,228 (trainer:732) INFO: 38epoch:train:301-400batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=71.814, loss_att=55.645, acc=0.698, loss=60.496, backward_time=1.027, grad_norm=131.861, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.890e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 15:25:36,514 (trainer:732) INFO: 38epoch:train:401-500batch: iter_time=1.300e-04, forward_time=0.145, loss_ctc=70.354, loss_att=49.610, acc=0.704, loss=55.833, backward_time=1.027, grad_norm=118.136, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.889e-05, train_time=2.745
+[gpub007:0/64] 2023-07-11 15:27:51,727 (trainer:732) INFO: 38epoch:train:501-600batch: iter_time=1.200e-04, forward_time=0.143, loss_ctc=73.588, loss_att=50.672, acc=0.689, loss=57.547, backward_time=1.027, grad_norm=114.586, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.888e-05, train_time=2.704
+[gpub007:0/64] 2023-07-11 15:30:07,936 (trainer:732) INFO: 38epoch:train:601-700batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=72.102, loss_att=54.286, acc=0.690, loss=59.631, backward_time=1.031, grad_norm=134.955, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.887e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 15:32:43,553 (trainer:732) INFO: 38epoch:train:701-800batch: iter_time=1.331e-04, forward_time=0.146, loss_ctc=81.262, loss_att=66.322, acc=0.697, loss=70.804, backward_time=1.060, grad_norm=123.055, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.886e-05, train_time=3.112
+[gpub007:0/64] 2023-07-11 15:33:37,357 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-11 15:33:55,266 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 15:33:58,684 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a83cf6e90>)
+[gpub007:0/64] 2023-07-11 15:33:58,684 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-11 15:33:58,690 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 15:38:30,267 (trainer:732) INFO: 38epoch:train:801-900batch: iter_time=1.347, forward_time=0.146, loss_ctc=73.651, loss_att=55.154, acc=0.699, loss=60.703, backward_time=1.051, grad_norm=126.212, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.886e-05, train_time=6.934
+[gpub007:0/64] 2023-07-11 15:40:46,438 (trainer:732) INFO: 38epoch:train:901-1000batch: iter_time=1.174e-04, forward_time=0.144, loss_ctc=70.743, loss_att=51.540, acc=0.697, loss=57.301, backward_time=1.025, grad_norm=125.456, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.885e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 15:43:01,701 (trainer:732) INFO: 38epoch:train:1001-1100batch: iter_time=1.194e-04, forward_time=0.144, loss_ctc=73.721, loss_att=53.760, acc=0.706, loss=59.748, backward_time=1.024, grad_norm=112.246, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.884e-05, train_time=2.705
+[gpub007:0/64] 2023-07-11 15:45:17,505 (trainer:732) INFO: 38epoch:train:1101-1200batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=72.435, loss_att=52.704, acc=0.711, loss=58.623, backward_time=1.027, grad_norm=104.981, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.883e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 15:47:32,904 (trainer:732) INFO: 38epoch:train:1201-1300batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=75.158, loss_att=54.691, acc=0.700, loss=60.831, backward_time=1.025, grad_norm=122.617, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.882e-05, train_time=2.708
+[gpub007:0/64] 2023-07-11 15:49:48,147 (trainer:732) INFO: 38epoch:train:1301-1400batch: iter_time=1.153e-04, forward_time=0.144, loss_ctc=67.569, loss_att=46.503, acc=0.708, loss=52.823, backward_time=1.025, grad_norm=116.592, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.882e-05, train_time=2.705
+[gpub007:0/64] 2023-07-11 15:52:03,809 (trainer:732) INFO: 38epoch:train:1401-1500batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=74.796, loss_att=51.540, acc=0.693, loss=58.517, backward_time=1.027, grad_norm=110.585, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.881e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 15:54:19,335 (trainer:732) INFO: 38epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=71.958, loss_att=61.097, acc=0.690, loss=64.355, backward_time=1.025, grad_norm=113.539, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.880e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 15:55:50,222 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-11 15:56:08,930 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 15:56:12,441 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b36380a30>)
+[gpub007:0/64] 2023-07-11 15:56:12,441 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-11 15:56:12,447 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 16:01:18,564 (trainer:732) INFO: 38epoch:train:1601-1700batch: iter_time=1.317, forward_time=0.200, loss_ctc=81.834, loss_att=63.994, acc=0.701, loss=69.346, backward_time=1.044, grad_norm=133.607, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.185, optim0_lr0=5.879e-05, train_time=8.384
+[gpub007:0/64] 2023-07-11 16:03:34,797 (trainer:732) INFO: 38epoch:train:1701-1800batch: iter_time=1.470e-04, forward_time=0.148, loss_ctc=67.402, loss_att=49.279, acc=0.700, loss=54.716, backward_time=1.031, grad_norm=110.671, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.878e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 16:05:50,444 (trainer:732) INFO: 38epoch:train:1801-1900batch: iter_time=9.531e-05, forward_time=0.144, loss_ctc=72.375, loss_att=53.089, acc=0.708, loss=58.875, backward_time=1.027, grad_norm=108.075, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.877e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 16:08:06,252 (trainer:732) INFO: 38epoch:train:1901-2000batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=73.375, loss_att=53.204, acc=0.705, loss=59.255, backward_time=1.029, grad_norm=123.638, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.877e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 16:10:22,103 (trainer:732) INFO: 38epoch:train:2001-2100batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=73.792, loss_att=53.598, acc=0.707, loss=59.656, backward_time=1.028, grad_norm=113.370, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.876e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 16:12:37,344 (trainer:732) INFO: 38epoch:train:2101-2200batch: iter_time=1.017e-04, forward_time=0.143, loss_ctc=68.777, loss_att=48.127, acc=0.707, loss=54.322, backward_time=1.024, grad_norm=103.500, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.875e-05, train_time=2.705
+[gpub007:0/64] 2023-07-11 16:14:52,913 (trainer:732) INFO: 38epoch:train:2201-2300batch: iter_time=1.120e-04, forward_time=0.144, loss_ctc=72.635, loss_att=50.736, acc=0.688, loss=57.305, backward_time=1.026, grad_norm=121.079, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.874e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 16:17:08,900 (trainer:732) INFO: 38epoch:train:2301-2400batch: iter_time=1.095e-04, forward_time=0.145, loss_ctc=72.630, loss_att=59.796, acc=0.694, loss=63.646, backward_time=1.028, grad_norm=123.940, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.873e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 16:19:26,445 (trainer:732) INFO: 38epoch:train:2401-2500batch: iter_time=1.080e-04, forward_time=0.144, loss_ctc=78.558, loss_att=59.280, acc=0.703, loss=65.064, backward_time=1.027, grad_norm=152.682, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.873e-05, train_time=2.751
+[gpub007:0/64] 2023-07-11 16:19:43,113 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-11 16:20:01,000 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 16:20:04,423 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aeca39570>)
+[gpub007:0/64] 2023-07-11 16:20:04,423 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-11 16:20:04,430 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 16:26:52,786 (trainer:732) INFO: 38epoch:train:2501-2600batch: iter_time=3.006, forward_time=0.146, loss_ctc=70.412, loss_att=52.815, acc=0.696, loss=58.094, backward_time=1.045, grad_norm=111.045, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.872e-05, train_time=8.927
+[gpub007:0/64] 2023-07-11 16:29:08,531 (trainer:732) INFO: 38epoch:train:2601-2700batch: iter_time=1.174e-04, forward_time=0.146, loss_ctc=69.552, loss_att=50.822, acc=0.702, loss=56.441, backward_time=1.028, grad_norm=120.794, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.871e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 16:31:25,764 (trainer:732) INFO: 38epoch:train:2701-2800batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=73.867, loss_att=53.099, acc=0.716, loss=59.329, backward_time=1.028, grad_norm=175.599, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.870e-05, train_time=2.744
+[gpub007:0/64] 2023-07-11 16:33:41,726 (trainer:732) INFO: 38epoch:train:2801-2900batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=71.414, loss_att=54.946, acc=0.704, loss=59.886, backward_time=1.029, grad_norm=121.577, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.869e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 16:35:57,597 (trainer:732) INFO: 38epoch:train:2901-3000batch: iter_time=1.220e-04, forward_time=0.147, loss_ctc=69.942, loss_att=48.570, acc=0.711, loss=54.981, backward_time=1.029, grad_norm=123.931, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.869e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 16:38:48,643 (trainer:732) INFO: 38epoch:train:3001-3100batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=69.822, loss_att=48.300, acc=0.699, loss=54.757, backward_time=1.067, grad_norm=119.641, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.868e-05, train_time=3.421
+[gpub007:0/64] 2023-07-11 16:41:05,225 (trainer:732) INFO: 38epoch:train:3101-3200batch: iter_time=1.319e-04, forward_time=0.144, loss_ctc=72.589, loss_att=54.846, acc=0.691, loss=60.169, backward_time=1.030, grad_norm=114.906, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.867e-05, train_time=2.731
+[gpub007:0/64] 2023-07-11 16:43:21,196 (trainer:732) INFO: 38epoch:train:3201-3300batch: iter_time=1.310e-04, forward_time=0.146, loss_ctc=78.884, loss_att=63.286, acc=0.699, loss=67.966, backward_time=1.029, grad_norm=120.031, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.866e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 16:44:10,033 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-11 16:44:28,458 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 16:44:31,892 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59a9e634f0>)
+[gpub007:0/64] 2023-07-11 16:44:31,892 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-11 16:44:31,899 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 16:48:52,449 (trainer:732) INFO: 38epoch:train:3301-3400batch: iter_time=1.285, forward_time=0.145, loss_ctc=76.662, loss_att=58.886, acc=0.694, loss=64.219, backward_time=1.042, grad_norm=120.851, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.865e-05, train_time=6.625
+[gpub007:0/64] 2023-07-11 16:51:08,696 (trainer:732) INFO: 38epoch:train:3401-3500batch: iter_time=9.670e-05, forward_time=0.144, loss_ctc=68.685, loss_att=48.626, acc=0.711, loss=54.643, backward_time=1.029, grad_norm=120.011, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.864e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 16:53:24,624 (trainer:732) INFO: 38epoch:train:3501-3600batch: iter_time=9.487e-05, forward_time=0.145, loss_ctc=74.115, loss_att=53.109, acc=0.719, loss=59.411, backward_time=1.029, grad_norm=108.127, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.181, optim0_lr0=5.864e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 16:55:40,535 (trainer:732) INFO: 38epoch:train:3601-3700batch: iter_time=1.207e-04, forward_time=0.147, loss_ctc=69.858, loss_att=52.811, acc=0.704, loss=57.925, backward_time=1.028, grad_norm=111.325, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.863e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 16:57:55,982 (trainer:732) INFO: 38epoch:train:3701-3800batch: iter_time=1.540e-04, forward_time=0.146, loss_ctc=70.752, loss_att=49.487, acc=0.709, loss=55.867, backward_time=1.026, grad_norm=119.178, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.862e-05, train_time=2.709
+[gpub007:0/64] 2023-07-11 17:00:11,497 (trainer:732) INFO: 38epoch:train:3801-3900batch: iter_time=1.460e-04, forward_time=0.146, loss_ctc=70.453, loss_att=47.431, acc=0.698, loss=54.338, backward_time=1.026, grad_norm=109.875, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.861e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 17:02:27,401 (trainer:732) INFO: 38epoch:train:3901-4000batch: iter_time=1.432e-04, forward_time=0.146, loss_ctc=70.507, loss_att=53.180, acc=0.696, loss=58.378, backward_time=1.030, grad_norm=146.020, clip=100.000, loss_scale=8.113e+31, optim_step_time=0.182, optim0_lr0=5.860e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 17:04:43,224 (trainer:732) INFO: 38epoch:train:4001-4100batch: iter_time=1.373e-04, forward_time=0.147, loss_ctc=78.805, loss_att=62.561, acc=0.704, loss=67.435, backward_time=1.029, grad_norm=131.328, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.860e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 17:06:15,512 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-11 17:06:33,400 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 17:06:36,806 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5b41c1ffa0>)
+[gpub007:0/64] 2023-07-11 17:06:36,806 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-11 17:06:36,812 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 17:10:47,271 (trainer:732) INFO: 38epoch:train:4101-4200batch: iter_time=1.230, forward_time=0.146, loss_ctc=72.417, loss_att=56.184, acc=0.708, loss=61.054, backward_time=1.044, grad_norm=118.253, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.859e-05, train_time=7.281
+[gpub007:0/64] 2023-07-11 17:17:30,106 (trainer:732) INFO: 38epoch:train:4201-4300batch: iter_time=2.531, forward_time=0.213, loss_ctc=68.753, loss_att=50.993, acc=0.709, loss=56.321, backward_time=1.049, grad_norm=121.788, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.184, optim0_lr0=5.858e-05, train_time=8.056
+[gpub007:0/64] 2023-07-11 17:19:49,743 (trainer:732) INFO: 38epoch:train:4301-4400batch: iter_time=1.240e-04, forward_time=0.145, loss_ctc=72.558, loss_att=53.576, acc=0.714, loss=59.270, backward_time=1.032, grad_norm=118.292, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.857e-05, train_time=2.793
+[gpub007:0/64] 2023-07-11 17:22:07,866 (trainer:732) INFO: 38epoch:train:4401-4500batch: iter_time=1.197e-04, forward_time=0.147, loss_ctc=71.112, loss_att=51.616, acc=0.719, loss=57.465, backward_time=1.031, grad_norm=112.490, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.856e-05, train_time=2.762
+[gpub007:0/64] 2023-07-11 17:24:29,383 (trainer:732) INFO: 38epoch:train:4501-4600batch: iter_time=1.195e-04, forward_time=0.147, loss_ctc=71.827, loss_att=53.009, acc=0.715, loss=58.654, backward_time=1.032, grad_norm=123.152, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.856e-05, train_time=2.830
+[gpub007:0/64] 2023-07-11 17:32:11,819 (trainer:732) INFO: 38epoch:train:4601-4700batch: iter_time=3.151, forward_time=0.208, loss_ctc=66.866, loss_att=46.398, acc=0.710, loss=52.539, backward_time=1.043, grad_norm=113.267, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.186, optim0_lr0=5.855e-05, train_time=9.248
+[gpub007:0/64] 2023-07-11 17:34:29,405 (trainer:732) INFO: 38epoch:train:4701-4800batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=73.675, loss_att=50.793, acc=0.699, loss=57.658, backward_time=1.035, grad_norm=118.717, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.854e-05, train_time=2.752
+[gpub007:0/64] 2023-07-11 17:36:45,640 (trainer:732) INFO: 38epoch:train:4801-4900batch: iter_time=1.438e-04, forward_time=0.147, loss_ctc=74.144, loss_att=61.531, acc=0.702, loss=65.315, backward_time=1.031, grad_norm=118.231, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.853e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 17:39:01,941 (trainer:732) INFO: 38epoch:train:4901-5000batch: iter_time=1.389e-04, forward_time=0.145, loss_ctc=76.127, loss_att=54.705, acc=0.723, loss=61.131, backward_time=1.028, grad_norm=141.808, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.852e-05, train_time=2.726
+[gpub007:0/64] 2023-07-11 17:39:22,673 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-11 17:39:40,627 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 17:39:44,068 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f599c48c850>)
+[gpub007:0/64] 2023-07-11 17:39:44,068 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-11 17:39:44,075 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 17:45:51,254 (trainer:732) INFO: 38epoch:train:5001-5100batch: iter_time=2.554, forward_time=0.146, loss_ctc=69.742, loss_att=52.822, acc=0.708, loss=57.898, backward_time=1.048, grad_norm=120.019, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.852e-05, train_time=8.186
+[gpub007:0/64] 2023-07-11 17:48:07,477 (trainer:732) INFO: 38epoch:train:5101-5200batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=70.586, loss_att=51.568, acc=0.706, loss=57.273, backward_time=1.029, grad_norm=128.961, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.851e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 17:50:25,199 (trainer:732) INFO: 38epoch:train:5201-5300batch: iter_time=1.194e-04, forward_time=0.147, loss_ctc=73.336, loss_att=52.926, acc=0.726, loss=59.049, backward_time=1.034, grad_norm=122.483, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.850e-05, train_time=2.754
+[gpub007:0/64] 2023-07-11 17:52:47,787 (trainer:732) INFO: 38epoch:train:5301-5400batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=70.923, loss_att=53.524, acc=0.712, loss=58.744, backward_time=1.038, grad_norm=114.597, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.849e-05, train_time=2.852
+[gpub007:0/64] 2023-07-11 17:55:05,182 (trainer:732) INFO: 38epoch:train:5401-5500batch: iter_time=1.268e-04, forward_time=0.147, loss_ctc=69.702, loss_att=47.066, acc=0.721, loss=53.857, backward_time=1.032, grad_norm=119.223, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.848e-05, train_time=2.748
+[gpub007:0/64] 2023-07-11 17:57:25,026 (trainer:732) INFO: 38epoch:train:5501-5600batch: iter_time=1.298e-04, forward_time=0.145, loss_ctc=68.687, loss_att=49.161, acc=0.704, loss=55.019, backward_time=1.039, grad_norm=124.182, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.848e-05, train_time=2.797
+[gpub007:0/64] 2023-07-11 17:59:40,767 (trainer:732) INFO: 38epoch:train:5601-5700batch: iter_time=1.311e-04, forward_time=0.145, loss_ctc=70.284, loss_att=51.678, acc=0.703, loss=57.260, backward_time=1.025, grad_norm=108.718, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.847e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 18:01:57,543 (trainer:732) INFO: 38epoch:train:5701-5800batch: iter_time=1.326e-04, forward_time=0.145, loss_ctc=75.984, loss_att=63.009, acc=0.712, loss=66.901, backward_time=1.031, grad_norm=123.887, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.846e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 18:02:45,064 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-11 18:03:03,156 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 18:03:06,586 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5976b5bfd0>)
+[gpub007:0/64] 2023-07-11 18:03:06,586 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-11 18:03:06,592 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 18:07:19,305 (trainer:732) INFO: 38epoch:train:5801-5900batch: iter_time=1.235, forward_time=0.147, loss_ctc=73.776, loss_att=54.691, acc=0.708, loss=60.417, backward_time=1.043, grad_norm=136.738, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.845e-05, train_time=6.435
+[gpub007:0/64] 2023-07-11 18:09:36,187 (trainer:732) INFO: 38epoch:train:5901-6000batch: iter_time=1.441e-04, forward_time=0.148, loss_ctc=70.183, loss_att=51.391, acc=0.710, loss=57.029, backward_time=1.031, grad_norm=147.682, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.844e-05, train_time=2.737
+[gpub007:0/64] 2023-07-11 18:11:52,397 (trainer:732) INFO: 38epoch:train:6001-6100batch: iter_time=1.310e-04, forward_time=0.148, loss_ctc=70.819, loss_att=52.613, acc=0.711, loss=58.075, backward_time=1.031, grad_norm=113.462, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.844e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 18:14:08,779 (trainer:732) INFO: 38epoch:train:6101-6200batch: iter_time=1.367e-04, forward_time=0.150, loss_ctc=71.513, loss_att=52.325, acc=0.726, loss=58.081, backward_time=1.032, grad_norm=118.225, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.843e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 18:16:24,587 (trainer:732) INFO: 38epoch:train:6201-6300batch: iter_time=1.327e-04, forward_time=0.147, loss_ctc=73.977, loss_att=53.956, acc=0.712, loss=59.962, backward_time=1.028, grad_norm=121.817, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.842e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 18:18:40,880 (trainer:732) INFO: 38epoch:train:6301-6400batch: iter_time=1.296e-04, forward_time=0.147, loss_ctc=66.128, loss_att=45.003, acc=0.718, loss=51.341, backward_time=1.027, grad_norm=106.130, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.841e-05, train_time=2.726
+[gpub007:0/64] 2023-07-11 18:20:56,411 (trainer:732) INFO: 38epoch:train:6401-6500batch: iter_time=1.340e-04, forward_time=0.146, loss_ctc=74.344, loss_att=50.406, acc=0.699, loss=57.588, backward_time=1.027, grad_norm=119.146, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.840e-05, train_time=2.710
+[gpub007:0/64] 2023-07-11 18:23:12,519 (trainer:732) INFO: 38epoch:train:6501-6600batch: iter_time=9.948e-05, forward_time=0.146, loss_ctc=70.868, loss_att=60.984, acc=0.701, loss=63.949, backward_time=1.030, grad_norm=116.879, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.840e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 18:24:45,044 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-11 18:25:02,972 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 18:25:06,379 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59a8c30340>)
+[gpub007:0/64] 2023-07-11 18:25:06,379 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-11 18:25:06,385 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 18:30:09,584 (trainer:732) INFO: 38epoch:train:6601-6700batch: iter_time=1.227, forward_time=0.155, loss_ctc=73.224, loss_att=52.921, acc=0.725, loss=59.012, backward_time=1.047, grad_norm=116.298, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.839e-05, train_time=8.341
+[gpub007:0/64] 2023-07-11 18:32:26,484 (trainer:732) INFO: 38epoch:train:6701-6800batch: iter_time=1.234e-04, forward_time=0.147, loss_ctc=72.226, loss_att=55.596, acc=0.709, loss=60.585, backward_time=1.030, grad_norm=113.274, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.838e-05, train_time=2.738
+[gpub007:0/64] 2023-07-11 18:34:42,989 (trainer:732) INFO: 38epoch:train:6801-6900batch: iter_time=1.331e-04, forward_time=0.148, loss_ctc=66.143, loss_att=48.199, acc=0.713, loss=53.583, backward_time=1.030, grad_norm=119.491, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.837e-05, train_time=2.730
+[gpub007:0/64] 2023-07-11 18:36:59,131 (trainer:732) INFO: 38epoch:train:6901-7000batch: iter_time=1.259e-04, forward_time=0.148, loss_ctc=77.564, loss_att=57.864, acc=0.718, loss=63.774, backward_time=1.031, grad_norm=160.365, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.836e-05, train_time=2.723
+[gpub007:0/64] 2023-07-11 18:39:17,279 (trainer:732) INFO: 38epoch:train:7001-7100batch: iter_time=1.247e-04, forward_time=0.147, loss_ctc=70.556, loss_att=50.590, acc=0.724, loss=56.580, backward_time=1.037, grad_norm=122.650, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.836e-05, train_time=2.763
+[gpub007:0/64] 2023-07-11 18:41:34,504 (trainer:732) INFO: 38epoch:train:7101-7200batch: iter_time=3.144e-04, forward_time=0.149, loss_ctc=68.346, loss_att=48.698, acc=0.710, loss=54.593, backward_time=1.029, grad_norm=118.638, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.835e-05, train_time=2.744
+[gpub007:0/64] 2023-07-11 18:43:57,994 (trainer:732) INFO: 38epoch:train:7201-7300batch: iter_time=1.238e-04, forward_time=0.155, loss_ctc=66.162, loss_att=45.993, acc=0.706, loss=52.044, backward_time=1.036, grad_norm=106.315, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.183, optim0_lr0=5.834e-05, train_time=2.870
+[gpub007:0/64] 2023-07-11 18:46:14,344 (trainer:732) INFO: 38epoch:train:7301-7400batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=73.532, loss_att=58.908, acc=0.703, loss=63.295, backward_time=1.032, grad_norm=115.391, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.833e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 18:48:30,970 (trainer:732) INFO: 38epoch:train:7401-7500batch: iter_time=1.122e-04, forward_time=0.146, loss_ctc=76.415, loss_att=58.132, acc=0.720, loss=63.617, backward_time=1.029, grad_norm=139.268, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.832e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 18:48:34,386 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-11 18:48:52,397 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 18:48:55,825 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5976b9c880>)
+[gpub007:0/64] 2023-07-11 18:48:55,825 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-11 18:48:55,831 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 18:55:43,722 (trainer:732) INFO: 38epoch:train:7501-7600batch: iter_time=1.273, forward_time=0.146, loss_ctc=68.725, loss_att=54.047, acc=0.697, loss=58.451, backward_time=1.046, grad_norm=128.241, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.832e-05, train_time=8.655
+[gpub007:0/64] 2023-07-11 18:58:00,008 (trainer:732) INFO: 38epoch:train:7601-7700batch: iter_time=1.244e-04, forward_time=0.145, loss_ctc=69.149, loss_att=49.610, acc=0.713, loss=55.472, backward_time=1.028, grad_norm=114.492, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.831e-05, train_time=2.726
+[gpub007:0/64] 2023-07-11 19:00:15,773 (trainer:732) INFO: 38epoch:train:7701-7800batch: iter_time=1.135e-04, forward_time=0.144, loss_ctc=74.497, loss_att=54.442, acc=0.715, loss=60.458, backward_time=1.026, grad_norm=113.087, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.830e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 19:02:31,653 (trainer:732) INFO: 38epoch:train:7801-7900batch: iter_time=1.162e-04, forward_time=0.146, loss_ctc=71.600, loss_att=54.243, acc=0.707, loss=59.450, backward_time=1.028, grad_norm=109.203, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.829e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 19:04:47,401 (trainer:732) INFO: 38epoch:train:7901-8000batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=69.502, loss_att=48.025, acc=0.717, loss=54.468, backward_time=1.027, grad_norm=148.987, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.829e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 19:07:03,115 (trainer:732) INFO: 38epoch:train:8001-8100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=68.040, loss_att=47.776, acc=0.705, loss=53.855, backward_time=1.027, grad_norm=108.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.828e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 19:09:18,954 (trainer:732) INFO: 38epoch:train:8101-8200batch: iter_time=1.398e-04, forward_time=0.145, loss_ctc=69.845, loss_att=51.788, acc=0.700, loss=57.205, backward_time=1.028, grad_norm=106.177, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.827e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 19:11:34,893 (trainer:732) INFO: 38epoch:train:8201-8300batch: iter_time=1.373e-04, forward_time=0.147, loss_ctc=76.879, loss_att=62.620, acc=0.704, loss=66.898, backward_time=1.029, grad_norm=146.480, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.826e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 19:12:20,817 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-11 19:12:39,527 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 19:12:43,264 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65fcce7520>)
+[gpub007:0/64] 2023-07-11 19:12:43,264 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-11 19:12:43,270 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 19:17:54,605 (trainer:732) INFO: 38epoch:train:8301-8400batch: iter_time=1.258, forward_time=0.183, loss_ctc=72.466, loss_att=54.292, acc=0.711, loss=59.744, backward_time=1.060, grad_norm=111.111, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.186, optim0_lr0=5.825e-05, train_time=7.594
+[gpub007:0/64] 2023-07-11 19:20:15,267 (trainer:732) INFO: 38epoch:train:8401-8500batch: iter_time=2.415e-04, forward_time=0.154, loss_ctc=66.970, loss_att=49.058, acc=0.715, loss=54.431, backward_time=1.039, grad_norm=124.947, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.825e-05, train_time=2.813
+[gpub007:0/64] 2023-07-11 19:22:32,016 (trainer:732) INFO: 38epoch:train:8501-8600batch: iter_time=1.261e-04, forward_time=0.146, loss_ctc=70.855, loss_att=52.953, acc=0.714, loss=58.324, backward_time=1.028, grad_norm=119.668, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.824e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 19:24:48,129 (trainer:732) INFO: 38epoch:train:8601-8700batch: iter_time=1.272e-04, forward_time=0.147, loss_ctc=71.921, loss_att=50.753, acc=0.726, loss=57.103, backward_time=1.029, grad_norm=124.913, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.823e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 19:27:04,015 (trainer:732) INFO: 38epoch:train:8701-8800batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=72.131, loss_att=56.377, acc=0.711, loss=61.103, backward_time=1.029, grad_norm=166.004, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.822e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 19:29:19,821 (trainer:732) INFO: 38epoch:train:8801-8900batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=65.279, loss_att=44.458, acc=0.720, loss=50.705, backward_time=1.029, grad_norm=110.307, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.821e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 19:31:35,382 (trainer:732) INFO: 38epoch:train:8901-9000batch: iter_time=1.212e-04, forward_time=0.145, loss_ctc=74.734, loss_att=51.599, acc=0.700, loss=58.539, backward_time=1.028, grad_norm=109.171, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.821e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 19:33:51,354 (trainer:732) INFO: 38epoch:train:9001-9100batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=72.050, loss_att=59.282, acc=0.708, loss=63.112, backward_time=1.030, grad_norm=117.917, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.820e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 19:35:41,200 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-11 19:35:59,242 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 19:36:02,770 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59d8737610>)
+[gpub007:0/64] 2023-07-11 19:36:02,770 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-11 19:36:02,776 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 19:40:21,958 (trainer:732) INFO: 38epoch:train:9101-9200batch: iter_time=1.997, forward_time=0.145, loss_ctc=77.765, loss_att=61.390, acc=0.716, loss=66.302, backward_time=1.047, grad_norm=120.032, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.819e-05, train_time=7.812
+[gpub007:0/64] 2023-07-11 19:42:38,550 (trainer:732) INFO: 38epoch:train:9201-9300batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=67.866, loss_att=49.324, acc=0.710, loss=54.886, backward_time=1.032, grad_norm=107.431, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.818e-05, train_time=2.732
+[gpub007:0/64] 2023-07-11 19:44:54,794 (trainer:732) INFO: 38epoch:train:9301-9400batch: iter_time=1.387e-04, forward_time=0.147, loss_ctc=69.892, loss_att=52.442, acc=0.716, loss=57.677, backward_time=1.029, grad_norm=143.222, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.817e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 19:47:11,992 (trainer:732) INFO: 38epoch:train:9401-9500batch: iter_time=1.198e-04, forward_time=0.148, loss_ctc=70.450, loss_att=51.486, acc=0.719, loss=57.175, backward_time=1.029, grad_norm=138.038, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.817e-05, train_time=2.744
+[gpub007:0/64] 2023-07-11 19:49:28,386 (trainer:732) INFO: 38epoch:train:9501-9600batch: iter_time=1.269e-04, forward_time=0.147, loss_ctc=72.542, loss_att=52.742, acc=0.717, loss=58.682, backward_time=1.030, grad_norm=121.855, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.816e-05, train_time=2.728
+[gpub007:0/64] 2023-07-11 19:51:44,010 (trainer:732) INFO: 38epoch:train:9601-9700batch: iter_time=1.413e-04, forward_time=0.146, loss_ctc=64.937, loss_att=44.673, acc=0.718, loss=50.752, backward_time=1.027, grad_norm=101.027, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.815e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 19:53:59,791 (trainer:732) INFO: 38epoch:train:9701-9800batch: iter_time=1.251e-04, forward_time=0.147, loss_ctc=70.702, loss_att=49.664, acc=0.699, loss=55.976, backward_time=1.028, grad_norm=123.006, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.814e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 19:56:15,795 (trainer:732) INFO: 38epoch:train:9801-9900batch: iter_time=1.446e-04, forward_time=0.147, loss_ctc=71.465, loss_att=59.772, acc=0.706, loss=63.280, backward_time=1.029, grad_norm=106.173, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.814e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 19:58:41,336 (trainer:732) INFO: 38epoch:train:9901-10000batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=74.490, loss_att=53.633, acc=0.724, loss=59.890, backward_time=1.040, grad_norm=135.312, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.813e-05, train_time=2.911
+[gpub007:0/64] 2023-07-11 20:11:56,519 (trainer:338) INFO: 38epoch results: [train] iter_time=0.288, forward_time=0.149, loss_ctc=71.940, loss_att=53.255, acc=0.707, loss=58.860, backward_time=1.032, grad_norm=121.692, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.852e-05, train_time=3.523, time=4 hours, 53 minutes and 53.03 seconds, total_count=350000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=43.950, cer_ctc=0.261, loss_att=37.228, acc=0.685, cer=0.385, wer=0.996, loss=39.244, time=7 minutes and 0.7 seconds, total_count=35926, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 56.54 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-11 20:12:12,342 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub007:0/64] 2023-07-11 20:12:12,352 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/24epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/33epoch.pth
+[gpub007:0/64] 2023-07-11 20:12:12,352 (trainer:272) INFO: 39/50epoch started. Estimated time to finish: 2 days, 10 hours and 25 minutes
+[gpub007:0/64] 2023-07-11 20:12:12,356 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-11 20:12:30,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 20:12:33,884 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65df25ba60>)
+[gpub007:0/64] 2023-07-11 20:12:33,884 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub007:0/64] 2023-07-11 20:12:33,890 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 20:17:04,086 (trainer:732) INFO: 39epoch:train:1-100batch: iter_time=1.472, forward_time=0.146, loss_ctc=82.445, loss_att=57.403, acc=0.707, loss=64.915, backward_time=1.051, grad_norm=146.381, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.812e-05, train_time=5.834
+[gpub007:0/64] 2023-07-11 20:19:22,186 (trainer:732) INFO: 39epoch:train:101-200batch: iter_time=6.767e-04, forward_time=0.163, loss_ctc=70.922, loss_att=53.759, acc=0.705, loss=58.908, backward_time=1.028, grad_norm=143.470, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.183, optim0_lr0=5.811e-05, train_time=2.762
+[gpub007:0/64] 2023-07-11 20:21:39,123 (trainer:732) INFO: 39epoch:train:201-300batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=72.707, loss_att=63.764, acc=0.689, loss=66.447, backward_time=1.029, grad_norm=122.595, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.810e-05, train_time=2.739
+[gpub007:0/64] 2023-07-11 20:24:04,949 (trainer:732) INFO: 39epoch:train:301-400batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=75.385, loss_att=61.053, acc=0.699, loss=65.353, backward_time=1.043, grad_norm=117.329, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.810e-05, train_time=2.916
+[gpub007:0/64] 2023-07-11 20:26:28,493 (trainer:732) INFO: 39epoch:train:401-500batch: iter_time=1.191e-04, forward_time=0.145, loss_ctc=66.666, loss_att=48.058, acc=0.718, loss=53.640, backward_time=1.036, grad_norm=100.428, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.809e-05, train_time=2.871
+[gpub007:0/64] 2023-07-11 20:28:50,785 (trainer:732) INFO: 39epoch:train:501-600batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=73.731, loss_att=55.130, acc=0.703, loss=60.710, backward_time=1.033, grad_norm=125.205, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.808e-05, train_time=2.846
+[gpub007:0/64] 2023-07-11 20:31:06,991 (trainer:732) INFO: 39epoch:train:601-700batch: iter_time=1.222e-04, forward_time=0.144, loss_ctc=75.103, loss_att=54.202, acc=0.718, loss=60.472, backward_time=1.028, grad_norm=116.167, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.807e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 20:33:32,626 (trainer:732) INFO: 39epoch:train:701-800batch: iter_time=1.113e-04, forward_time=0.179, loss_ctc=83.461, loss_att=67.013, acc=0.708, loss=71.947, backward_time=1.038, grad_norm=124.775, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.806e-05, train_time=2.912
+[gpub007:0/64] 2023-07-11 20:34:25,347 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub007:0/64] 2023-07-11 20:34:42,967 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 20:34:46,607 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f65fdc4a2c0>)
+[gpub007:0/64] 2023-07-11 20:34:46,607 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub007:0/64] 2023-07-11 20:34:46,613 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 20:40:11,035 (trainer:732) INFO: 39epoch:train:801-900batch: iter_time=1.515, forward_time=0.194, loss_ctc=69.526, loss_att=50.779, acc=0.716, loss=56.403, backward_time=1.043, grad_norm=109.498, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.185, optim0_lr0=5.806e-05, train_time=7.968
+[gpub007:0/64] 2023-07-11 20:42:27,546 (trainer:732) INFO: 39epoch:train:901-1000batch: iter_time=1.281e-04, forward_time=0.146, loss_ctc=72.782, loss_att=50.449, acc=0.717, loss=57.149, backward_time=1.029, grad_norm=119.216, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.805e-05, train_time=2.730
+[gpub007:0/64] 2023-07-11 20:44:43,782 (trainer:732) INFO: 39epoch:train:1001-1100batch: iter_time=1.232e-04, forward_time=0.146, loss_ctc=71.584, loss_att=61.262, acc=0.694, loss=64.358, backward_time=1.031, grad_norm=117.670, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.804e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 20:47:00,214 (trainer:732) INFO: 39epoch:train:1101-1200batch: iter_time=1.113e-04, forward_time=0.146, loss_ctc=78.972, loss_att=65.692, acc=0.686, loss=69.676, backward_time=1.031, grad_norm=138.979, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.803e-05, train_time=2.728
+[gpub007:0/64] 2023-07-11 20:49:16,262 (trainer:732) INFO: 39epoch:train:1201-1300batch: iter_time=1.247e-04, forward_time=0.146, loss_ctc=65.371, loss_att=48.898, acc=0.724, loss=53.840, backward_time=1.028, grad_norm=96.084, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.803e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 20:51:32,107 (trainer:732) INFO: 39epoch:train:1301-1400batch: iter_time=1.209e-04, forward_time=0.145, loss_ctc=77.244, loss_att=55.458, acc=0.720, loss=61.994, backward_time=1.028, grad_norm=119.068, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.802e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 20:53:47,887 (trainer:732) INFO: 39epoch:train:1401-1500batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=68.215, loss_att=49.460, acc=0.716, loss=55.086, backward_time=1.028, grad_norm=110.506, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.801e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 20:56:03,844 (trainer:732) INFO: 39epoch:train:1501-1600batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=84.470, loss_att=67.063, acc=0.704, loss=72.285, backward_time=1.028, grad_norm=143.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.800e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 20:57:38,968 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub007:0/64] 2023-07-11 20:57:57,200 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 20:58:00,858 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f602dceb610>)
+[gpub007:0/64] 2023-07-11 20:58:00,858 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-11 20:58:00,864 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 21:01:27,700 (trainer:732) INFO: 39epoch:train:1601-1700batch: iter_time=1.388, forward_time=0.145, loss_ctc=65.246, loss_att=47.145, acc=0.726, loss=52.576, backward_time=1.037, grad_norm=105.883, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.799e-05, train_time=6.477
+[gpub007:0/64] 2023-07-11 21:03:43,964 (trainer:732) INFO: 39epoch:train:1701-1800batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=80.856, loss_att=60.346, acc=0.697, loss=66.499, backward_time=1.029, grad_norm=132.862, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.799e-05, train_time=2.725
+[gpub007:0/64] 2023-07-11 21:05:59,603 (trainer:732) INFO: 39epoch:train:1801-1900batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=67.475, loss_att=59.764, acc=0.686, loss=62.077, backward_time=1.027, grad_norm=116.056, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.798e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 21:08:20,006 (trainer:732) INFO: 39epoch:train:1901-2000batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=79.321, loss_att=69.281, acc=0.669, loss=72.293, backward_time=1.029, grad_norm=130.565, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.797e-05, train_time=2.808
+[gpub007:0/64] 2023-07-11 21:09:11,473 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub007:0/64] 2023-07-11 21:10:39,704 (trainer:732) INFO: 39epoch:train:2001-2100batch: iter_time=1.126e-04, forward_time=0.146, loss_ctc=64.597, loss_att=51.502, acc=0.702, loss=55.430, backward_time=1.031, grad_norm=96.886, clip=100.000, loss_scale=4.437e+32, optim_step_time=0.182, optim0_lr0=5.796e-05, train_time=2.794
+[gpub007:0/64] 2023-07-11 21:12:55,607 (trainer:732) INFO: 39epoch:train:2101-2200batch: iter_time=1.231e-04, forward_time=0.146, loss_ctc=74.772, loss_att=53.675, acc=0.714, loss=60.004, backward_time=1.028, grad_norm=117.734, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.796e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 21:15:11,288 (trainer:732) INFO: 39epoch:train:2201-2300batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=67.289, loss_att=51.779, acc=0.697, loss=56.432, backward_time=1.028, grad_norm=106.108, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.795e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 21:17:26,865 (trainer:732) INFO: 39epoch:train:2301-2400batch: iter_time=1.220e-04, forward_time=0.145, loss_ctc=75.190, loss_att=56.083, acc=0.710, loss=61.815, backward_time=1.026, grad_norm=113.411, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.794e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 21:19:42,549 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub007:0/64] 2023-07-11 21:20:00,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 21:20:04,592 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aef4df490>)
+[gpub007:0/64] 2023-07-11 21:20:04,592 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub007:0/64] 2023-07-11 21:20:04,598 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 21:24:58,902 (trainer:732) INFO: 39epoch:train:2401-2500batch: iter_time=1.280, forward_time=0.145, loss_ctc=76.397, loss_att=58.799, acc=0.714, loss=64.078, backward_time=1.034, grad_norm=115.315, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.793e-05, train_time=9.041
+[gpub007:0/64] 2023-07-11 21:27:17,544 (trainer:732) INFO: 39epoch:train:2501-2600batch: iter_time=1.457e-04, forward_time=0.148, loss_ctc=81.056, loss_att=59.578, acc=0.701, loss=66.021, backward_time=1.043, grad_norm=142.406, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.792e-05, train_time=2.773
+[gpub007:0/64] 2023-07-11 21:29:33,439 (trainer:732) INFO: 39epoch:train:2601-2700batch: iter_time=1.604e-04, forward_time=0.148, loss_ctc=65.619, loss_att=55.447, acc=0.690, loss=58.498, backward_time=1.029, grad_norm=102.598, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.792e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 21:31:49,666 (trainer:732) INFO: 39epoch:train:2701-2800batch: iter_time=1.603e-04, forward_time=0.148, loss_ctc=82.212, loss_att=71.106, acc=0.669, loss=74.438, backward_time=1.031, grad_norm=129.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.791e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 21:34:05,242 (trainer:732) INFO: 39epoch:train:2801-2900batch: iter_time=1.524e-04, forward_time=0.147, loss_ctc=64.134, loss_att=51.913, acc=0.700, loss=55.579, backward_time=1.027, grad_norm=123.945, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.790e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 21:36:23,083 (trainer:732) INFO: 39epoch:train:2901-3000batch: iter_time=1.439e-04, forward_time=0.147, loss_ctc=79.298, loss_att=55.578, acc=0.712, loss=62.694, backward_time=1.030, grad_norm=139.929, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.789e-05, train_time=2.757
+[gpub007:0/64] 2023-07-11 21:38:38,798 (trainer:732) INFO: 39epoch:train:3001-3100batch: iter_time=1.589e-04, forward_time=0.147, loss_ctc=63.415, loss_att=48.217, acc=0.695, loss=52.776, backward_time=1.027, grad_norm=103.041, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.789e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 21:40:56,121 (trainer:732) INFO: 39epoch:train:3101-3200batch: iter_time=1.386e-04, forward_time=0.147, loss_ctc=78.088, loss_att=58.351, acc=0.711, loss=64.272, backward_time=1.029, grad_norm=105.578, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.788e-05, train_time=2.746
+[gpub007:0/64] 2023-07-11 21:41:51,553 (trainer:663) WARNING: The grad norm is nan. Skipping updating the model.
+[gpub007:0/64] 2023-07-11 21:43:12,954 (trainer:732) INFO: 39epoch:train:3201-3300batch: iter_time=1.415e-04, forward_time=0.148, loss_ctc=73.884, loss_att=56.857, acc=0.712, loss=61.965, backward_time=1.031, grad_norm=107.966, clip=100.000, loss_scale=2.252e+32, optim_step_time=0.182, optim0_lr0=5.787e-05, train_time=2.736
+[gpub007:0/64] 2023-07-11 21:43:59,089 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub007:0/64] 2023-07-11 21:44:17,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 21:44:20,710 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aef4d5ba0>)
+[gpub007:0/64] 2023-07-11 21:44:20,710 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub007:0/64] 2023-07-11 21:44:20,717 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 21:50:15,040 (trainer:732) INFO: 39epoch:train:3301-3400batch: iter_time=1.282, forward_time=0.160, loss_ctc=80.950, loss_att=59.708, acc=0.700, loss=66.081, backward_time=1.043, grad_norm=114.918, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.786e-05, train_time=8.441
+[gpub007:0/64] 2023-07-11 21:52:36,391 (trainer:732) INFO: 39epoch:train:3401-3500batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=68.881, loss_att=53.019, acc=0.709, loss=57.778, backward_time=1.036, grad_norm=115.394, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.785e-05, train_time=2.827
+[gpub007:0/64] 2023-07-11 21:54:53,232 (trainer:732) INFO: 39epoch:train:3501-3600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=70.559, loss_att=61.970, acc=0.696, loss=64.546, backward_time=1.031, grad_norm=135.658, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.785e-05, train_time=2.737
+[gpub007:0/64] 2023-07-11 21:57:09,569 (trainer:732) INFO: 39epoch:train:3601-3700batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=75.565, loss_att=60.106, acc=0.703, loss=64.744, backward_time=1.031, grad_norm=135.792, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.784e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 21:59:25,333 (trainer:732) INFO: 39epoch:train:3701-3800batch: iter_time=1.349e-04, forward_time=0.145, loss_ctc=63.881, loss_att=45.538, acc=0.730, loss=51.041, backward_time=1.027, grad_norm=119.573, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.783e-05, train_time=2.715
+[gpub007:0/64] 2023-07-11 22:01:42,075 (trainer:732) INFO: 39epoch:train:3801-3900batch: iter_time=1.319e-04, forward_time=0.146, loss_ctc=71.441, loss_att=54.331, acc=0.711, loss=59.464, backward_time=1.028, grad_norm=140.062, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.782e-05, train_time=2.735
+[gpub007:0/64] 2023-07-11 22:03:57,692 (trainer:732) INFO: 39epoch:train:3901-4000batch: iter_time=1.283e-04, forward_time=0.145, loss_ctc=73.747, loss_att=52.312, acc=0.723, loss=58.742, backward_time=1.027, grad_norm=108.179, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.782e-05, train_time=2.712
+[gpub007:0/64] 2023-07-11 22:06:16,663 (trainer:732) INFO: 39epoch:train:4001-4100batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=82.516, loss_att=66.612, acc=0.709, loss=71.383, backward_time=1.033, grad_norm=125.623, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.781e-05, train_time=2.779
+[gpub007:0/64] 2023-07-11 22:07:54,576 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub007:0/64] 2023-07-11 22:08:12,612 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 22:08:16,068 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f640f4cf460>)
+[gpub007:0/64] 2023-07-11 22:08:16,068 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub007:0/64] 2023-07-11 22:08:16,074 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 22:12:33,176 (trainer:732) INFO: 39epoch:train:4101-4200batch: iter_time=1.323, forward_time=0.145, loss_ctc=64.505, loss_att=45.467, acc=0.726, loss=51.178, backward_time=1.043, grad_norm=98.405, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.780e-05, train_time=7.530
+[gpub007:0/64] 2023-07-11 22:14:50,309 (trainer:732) INFO: 39epoch:train:4201-4300batch: iter_time=1.246e-04, forward_time=0.145, loss_ctc=80.061, loss_att=59.836, acc=0.698, loss=65.903, backward_time=1.031, grad_norm=112.396, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.779e-05, train_time=2.742
+[gpub007:0/64] 2023-07-11 22:17:06,891 (trainer:732) INFO: 39epoch:train:4301-4400batch: iter_time=1.234e-04, forward_time=0.145, loss_ctc=66.777, loss_att=55.327, acc=0.692, loss=58.762, backward_time=1.033, grad_norm=124.735, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.779e-05, train_time=2.731
+[gpub007:0/64] 2023-07-11 22:19:26,276 (trainer:732) INFO: 39epoch:train:4401-4500batch: iter_time=1.416e-04, forward_time=0.146, loss_ctc=72.687, loss_att=67.506, acc=0.678, loss=69.061, backward_time=1.031, grad_norm=111.537, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.778e-05, train_time=2.787
+[gpub007:0/64] 2023-07-11 22:21:42,125 (trainer:732) INFO: 39epoch:train:4501-4600batch: iter_time=1.443e-04, forward_time=0.145, loss_ctc=71.028, loss_att=53.255, acc=0.705, loss=58.587, backward_time=1.027, grad_norm=118.819, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.777e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 22:23:57,568 (trainer:732) INFO: 39epoch:train:4601-4700batch: iter_time=1.358e-04, forward_time=0.144, loss_ctc=74.469, loss_att=52.505, acc=0.718, loss=59.094, backward_time=1.025, grad_norm=106.386, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.776e-05, train_time=2.709
+[gpub007:0/64] 2023-07-11 22:26:17,925 (trainer:732) INFO: 39epoch:train:4701-4800batch: iter_time=1.489e-04, forward_time=0.145, loss_ctc=67.380, loss_att=50.509, acc=0.698, loss=55.570, backward_time=1.032, grad_norm=118.811, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.775e-05, train_time=2.807
+[gpub007:0/64] 2023-07-11 22:28:33,973 (trainer:732) INFO: 39epoch:train:4801-4900batch: iter_time=1.461e-04, forward_time=0.145, loss_ctc=76.569, loss_att=58.151, acc=0.712, loss=63.676, backward_time=1.028, grad_norm=109.727, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.775e-05, train_time=2.721
+[gpub007:0/64] 2023-07-11 22:30:49,870 (trainer:732) INFO: 39epoch:train:4901-5000batch: iter_time=1.377e-04, forward_time=0.145, loss_ctc=75.369, loss_att=58.528, acc=0.713, loss=63.580, backward_time=1.029, grad_norm=111.846, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.774e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 22:30:51,224 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub007:0/64] 2023-07-11 22:31:09,707 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 22:31:13,167 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5aee10f460>)
+[gpub007:0/64] 2023-07-11 22:31:13,167 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub007:0/64] 2023-07-11 22:31:13,173 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 22:38:08,191 (trainer:732) INFO: 39epoch:train:5001-5100batch: iter_time=1.344, forward_time=0.145, loss_ctc=82.216, loss_att=59.417, acc=0.703, loss=66.256, backward_time=1.042, grad_norm=162.555, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.773e-05, train_time=8.766
+[gpub007:0/64] 2023-07-11 22:40:24,746 (trainer:732) INFO: 39epoch:train:5101-5200batch: iter_time=1.092e-04, forward_time=0.145, loss_ctc=65.477, loss_att=56.303, acc=0.692, loss=59.055, backward_time=1.027, grad_norm=106.277, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.772e-05, train_time=2.731
+[gpub007:0/64] 2023-07-11 22:42:41,079 (trainer:732) INFO: 39epoch:train:5201-5300batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=77.772, loss_att=69.323, acc=0.670, loss=71.858, backward_time=1.031, grad_norm=135.923, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.772e-05, train_time=2.726
+[gpub007:0/64] 2023-07-11 22:44:56,788 (trainer:732) INFO: 39epoch:train:5301-5400batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=62.953, loss_att=48.841, acc=0.710, loss=53.075, backward_time=1.027, grad_norm=103.666, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.771e-05, train_time=2.714
+[gpub007:0/64] 2023-07-11 22:47:12,771 (trainer:732) INFO: 39epoch:train:5401-5500batch: iter_time=1.177e-04, forward_time=0.145, loss_ctc=77.382, loss_att=55.306, acc=0.718, loss=61.929, backward_time=1.031, grad_norm=114.521, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.770e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 22:49:42,770 (trainer:732) INFO: 39epoch:train:5501-5600batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=64.633, loss_att=48.744, acc=0.696, loss=53.511, backward_time=1.054, grad_norm=113.882, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.769e-05, train_time=3.000
+[gpub007:0/64] 2023-07-11 22:51:58,793 (trainer:732) INFO: 39epoch:train:5601-5700batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=77.302, loss_att=57.512, acc=0.712, loss=63.449, backward_time=1.029, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.769e-05, train_time=2.720
+[gpub007:0/64] 2023-07-11 22:54:16,192 (trainer:732) INFO: 39epoch:train:5701-5800batch: iter_time=1.324e-04, forward_time=0.147, loss_ctc=73.793, loss_att=55.864, acc=0.721, loss=61.243, backward_time=1.030, grad_norm=114.852, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.768e-05, train_time=2.748
+[gpub007:0/64] 2023-07-11 22:55:04,165 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub007:0/64] 2023-07-11 22:55:22,024 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 22:55:25,496 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f59650ec940>)
+[gpub007:0/64] 2023-07-11 22:55:25,496 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub007:0/64] 2023-07-11 22:55:25,503 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 23:00:17,174 (trainer:732) INFO: 39epoch:train:5801-5900batch: iter_time=1.300, forward_time=0.148, loss_ctc=79.487, loss_att=58.835, acc=0.707, loss=65.030, backward_time=1.046, grad_norm=153.347, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.767e-05, train_time=7.219
+[gpub007:0/64] 2023-07-11 23:02:33,705 (trainer:732) INFO: 39epoch:train:5901-6000batch: iter_time=1.184e-04, forward_time=0.147, loss_ctc=68.365, loss_att=51.824, acc=0.712, loss=56.787, backward_time=1.030, grad_norm=106.301, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.766e-05, train_time=2.730
+[gpub007:0/64] 2023-07-11 23:04:49,940 (trainer:732) INFO: 39epoch:train:6001-6100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=70.279, loss_att=63.284, acc=0.692, loss=65.382, backward_time=1.028, grad_norm=113.976, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.765e-05, train_time=2.724
+[gpub007:0/64] 2023-07-11 23:07:06,067 (trainer:732) INFO: 39epoch:train:6101-6200batch: iter_time=1.352e-04, forward_time=0.147, loss_ctc=74.481, loss_att=58.346, acc=0.709, loss=63.186, backward_time=1.030, grad_norm=110.168, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.765e-05, train_time=2.722
+[gpub007:0/64] 2023-07-11 23:09:21,857 (trainer:732) INFO: 39epoch:train:6201-6300batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=64.434, loss_att=46.023, acc=0.729, loss=51.546, backward_time=1.027, grad_norm=104.476, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.764e-05, train_time=2.716
+[gpub007:0/64] 2023-07-11 23:11:37,542 (trainer:732) INFO: 39epoch:train:6301-6400batch: iter_time=9.883e-05, forward_time=0.146, loss_ctc=73.731, loss_att=55.854, acc=0.705, loss=61.217, backward_time=1.029, grad_norm=128.634, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.763e-05, train_time=2.713
+[gpub007:0/64] 2023-07-11 23:13:53,110 (trainer:732) INFO: 39epoch:train:6401-6500batch: iter_time=1.014e-04, forward_time=0.145, loss_ctc=73.122, loss_att=52.799, acc=0.722, loss=58.896, backward_time=1.027, grad_norm=116.155, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.762e-05, train_time=2.711
+[gpub007:0/64] 2023-07-11 23:16:09,488 (trainer:732) INFO: 39epoch:train:6501-6600batch: iter_time=1.200e-04, forward_time=0.148, loss_ctc=80.677, loss_att=64.731, acc=0.717, loss=69.514, backward_time=1.032, grad_norm=124.275, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.762e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 23:17:43,563 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub007:0/64] 2023-07-11 23:18:01,565 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 23:18:04,990 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a71d05450>)
+[gpub007:0/64] 2023-07-11 23:18:04,991 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub007:0/64] 2023-07-11 23:18:04,997 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 23:23:07,054 (trainer:732) INFO: 39epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.148, loss_ctc=62.708, loss_att=44.454, acc=0.731, loss=49.930, backward_time=1.044, grad_norm=108.320, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.761e-05, train_time=8.351
+[gpub007:0/64] 2023-07-11 23:25:23,988 (trainer:732) INFO: 39epoch:train:6701-6800batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=76.781, loss_att=56.793, acc=0.708, loss=62.789, backward_time=1.033, grad_norm=122.413, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.760e-05, train_time=2.738
+[gpub007:0/64] 2023-07-11 23:27:39,830 (trainer:732) INFO: 39epoch:train:6801-6900batch: iter_time=1.359e-04, forward_time=0.145, loss_ctc=67.450, loss_att=56.179, acc=0.690, loss=59.560, backward_time=1.029, grad_norm=122.769, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.759e-05, train_time=2.717
+[gpub007:0/64] 2023-07-11 23:29:55,762 (trainer:732) INFO: 39epoch:train:6901-7000batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=74.007, loss_att=67.955, acc=0.676, loss=69.771, backward_time=1.028, grad_norm=110.064, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.181, optim0_lr0=5.759e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 23:32:11,662 (trainer:732) INFO: 39epoch:train:7001-7100batch: iter_time=1.410e-04, forward_time=0.146, loss_ctc=69.492, loss_att=52.768, acc=0.706, loss=57.785, backward_time=1.028, grad_norm=124.454, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.758e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 23:34:27,575 (trainer:732) INFO: 39epoch:train:7101-7200batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=74.585, loss_att=52.634, acc=0.720, loss=59.220, backward_time=1.025, grad_norm=118.461, clip=100.000, loss_scale=1.623e+32, optim_step_time=0.182, optim0_lr0=5.757e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 23:36:48,766 (trainer:732) INFO: 39epoch:train:7201-7300batch: iter_time=1.233e-04, forward_time=0.145, loss_ctc=66.711, loss_att=48.903, acc=0.700, loss=54.246, backward_time=1.039, grad_norm=116.360, clip=100.000, loss_scale=2.596e+32, optim_step_time=0.181, optim0_lr0=5.756e-05, train_time=2.824
+[gpub007:0/64] 2023-07-11 23:39:05,143 (trainer:732) INFO: 39epoch:train:7301-7400batch: iter_time=1.284e-04, forward_time=0.145, loss_ctc=75.458, loss_att=57.210, acc=0.713, loss=62.684, backward_time=1.027, grad_norm=114.220, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.756e-05, train_time=2.727
+[gpub007:0/64] 2023-07-11 23:41:21,094 (trainer:732) INFO: 39epoch:train:7401-7500batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=74.979, loss_att=59.217, acc=0.713, loss=63.946, backward_time=1.027, grad_norm=115.240, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.755e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 23:41:23,694 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub007:0/64] 2023-07-11 23:41:41,916 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-11 23:41:45,376 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5af6f47520>)
+[gpub007:0/64] 2023-07-11 23:41:45,377 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub007:0/64] 2023-07-11 23:41:45,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-11 23:46:42,941 (trainer:732) INFO: 39epoch:train:7501-7600batch: iter_time=1.296, forward_time=0.145, loss_ctc=77.503, loss_att=55.837, acc=0.711, loss=62.337, backward_time=1.051, grad_norm=145.896, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.754e-05, train_time=6.437
+[gpub007:0/64] 2023-07-11 23:48:59,674 (trainer:732) INFO: 39epoch:train:7601-7700batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=67.198, loss_att=51.955, acc=0.705, loss=56.528, backward_time=1.028, grad_norm=113.429, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.753e-05, train_time=2.734
+[gpub007:0/64] 2023-07-11 23:51:15,617 (trainer:732) INFO: 39epoch:train:7701-7800batch: iter_time=1.090e-04, forward_time=0.144, loss_ctc=69.917, loss_att=61.177, acc=0.693, loss=63.799, backward_time=1.028, grad_norm=115.644, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.752e-05, train_time=2.719
+[gpub007:0/64] 2023-07-11 23:53:31,519 (trainer:732) INFO: 39epoch:train:7801-7900batch: iter_time=1.015e-04, forward_time=0.145, loss_ctc=73.589, loss_att=58.090, acc=0.692, loss=62.739, backward_time=1.028, grad_norm=133.587, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.752e-05, train_time=2.718
+[gpub007:0/64] 2023-07-11 23:55:46,933 (trainer:732) INFO: 39epoch:train:7901-8000batch: iter_time=1.143e-04, forward_time=0.143, loss_ctc=66.739, loss_att=48.185, acc=0.715, loss=53.751, backward_time=1.025, grad_norm=104.374, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.751e-05, train_time=2.708
+[gpub007:0/64] 2023-07-11 23:58:02,634 (trainer:732) INFO: 39epoch:train:8001-8100batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=72.965, loss_att=54.303, acc=0.699, loss=59.902, backward_time=1.027, grad_norm=127.034, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.750e-05, train_time=2.714
+[gpub007:0/64] 2023-07-12 00:00:18,410 (trainer:732) INFO: 39epoch:train:8101-8200batch: iter_time=1.150e-04, forward_time=0.144, loss_ctc=74.311, loss_att=54.365, acc=0.710, loss=60.348, backward_time=1.028, grad_norm=116.272, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.749e-05, train_time=2.715
+[gpub007:0/64] 2023-07-12 00:02:34,457 (trainer:732) INFO: 39epoch:train:8201-8300batch: iter_time=1.422e-04, forward_time=0.147, loss_ctc=83.063, loss_att=65.404, acc=0.711, loss=70.701, backward_time=1.028, grad_norm=116.915, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.749e-05, train_time=2.721
+[gpub007:0/64] 2023-07-12 00:03:21,916 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub007:0/64] 2023-07-12 00:03:39,866 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-12 00:03:43,637 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5a71daf520>)
+[gpub007:0/64] 2023-07-12 00:03:43,637 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub007:0/64] 2023-07-12 00:03:43,643 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-12 00:09:32,672 (trainer:732) INFO: 39epoch:train:8301-8400batch: iter_time=1.294, forward_time=0.173, loss_ctc=67.213, loss_att=48.682, acc=0.723, loss=54.241, backward_time=1.041, grad_norm=99.075, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.748e-05, train_time=8.364
+[gpub007:0/64] 2023-07-12 00:11:52,214 (trainer:732) INFO: 39epoch:train:8401-8500batch: iter_time=1.272e-04, forward_time=0.145, loss_ctc=70.258, loss_att=50.030, acc=0.724, loss=56.098, backward_time=1.027, grad_norm=127.922, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.747e-05, train_time=2.791
+[gpub007:0/64] 2023-07-12 00:14:09,395 (trainer:732) INFO: 39epoch:train:8501-8600batch: iter_time=1.290e-04, forward_time=0.146, loss_ctc=70.442, loss_att=61.909, acc=0.697, loss=64.469, backward_time=1.029, grad_norm=112.268, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.746e-05, train_time=2.743
+[gpub007:0/64] 2023-07-12 00:16:25,778 (trainer:732) INFO: 39epoch:train:8601-8700batch: iter_time=1.262e-04, forward_time=0.148, loss_ctc=74.135, loss_att=62.987, acc=0.695, loss=66.332, backward_time=1.032, grad_norm=125.859, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.746e-05, train_time=2.727
+[gpub007:0/64] 2023-07-12 00:18:41,463 (trainer:732) INFO: 39epoch:train:8701-8800batch: iter_time=1.386e-04, forward_time=0.146, loss_ctc=63.375, loss_att=49.055, acc=0.724, loss=53.351, backward_time=1.027, grad_norm=99.015, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.745e-05, train_time=2.713
+[gpub007:0/64] 2023-07-12 00:20:57,425 (trainer:732) INFO: 39epoch:train:8801-8900batch: iter_time=1.280e-04, forward_time=0.146, loss_ctc=78.425, loss_att=54.594, acc=0.729, loss=61.744, backward_time=1.029, grad_norm=136.659, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.744e-05, train_time=2.719
+[gpub007:0/64] 2023-07-12 00:23:13,295 (trainer:732) INFO: 39epoch:train:8901-9000batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=67.255, loss_att=49.057, acc=0.716, loss=54.517, backward_time=1.027, grad_norm=100.071, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.181, optim0_lr0=5.743e-05, train_time=2.717
+[gpub007:0/64] 2023-07-12 00:25:29,675 (trainer:732) INFO: 39epoch:train:9001-9100batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=83.715, loss_att=66.621, acc=0.708, loss=71.749, backward_time=1.031, grad_norm=134.965, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.743e-05, train_time=2.727
+[gpub007:0/64] 2023-07-12 00:27:06,738 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub007:0/64] 2023-07-12 00:27:24,585 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-12 00:27:28,341 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f640fea0c40>)
+[gpub007:0/64] 2023-07-12 00:27:28,341 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub007:0/64] 2023-07-12 00:27:28,347 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-12 00:32:56,782 (trainer:732) INFO: 39epoch:train:9101-9200batch: iter_time=1.495, forward_time=0.146, loss_ctc=64.232, loss_att=44.875, acc=0.735, loss=50.682, backward_time=1.035, grad_norm=100.758, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.742e-05, train_time=8.942
+[gpub007:0/64] 2023-07-12 00:35:13,889 (trainer:732) INFO: 39epoch:train:9201-9300batch: iter_time=1.259e-04, forward_time=0.147, loss_ctc=78.149, loss_att=56.447, acc=0.718, loss=62.958, backward_time=1.033, grad_norm=115.095, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.741e-05, train_time=2.742
+[gpub007:0/64] 2023-07-12 00:37:31,478 (trainer:732) INFO: 39epoch:train:9301-9400batch: iter_time=1.239e-04, forward_time=0.146, loss_ctc=65.513, loss_att=54.281, acc=0.706, loss=57.651, backward_time=1.029, grad_norm=99.136, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.740e-05, train_time=2.752
+[gpub007:0/64] 2023-07-12 00:39:48,848 (trainer:732) INFO: 39epoch:train:9401-9500batch: iter_time=1.150e-04, forward_time=0.147, loss_ctc=72.450, loss_att=67.505, acc=0.688, loss=68.988, backward_time=1.031, grad_norm=117.756, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.740e-05, train_time=2.747
+[gpub007:0/64] 2023-07-12 00:42:05,020 (trainer:732) INFO: 39epoch:train:9501-9600batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=69.448, loss_att=51.998, acc=0.725, loss=57.233, backward_time=1.027, grad_norm=113.463, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.739e-05, train_time=2.723
+[gpub007:0/64] 2023-07-12 00:44:21,146 (trainer:732) INFO: 39epoch:train:9601-9700batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=74.213, loss_att=52.124, acc=0.730, loss=58.751, backward_time=1.031, grad_norm=112.332, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.738e-05, train_time=2.722
+[gpub007:0/64] 2023-07-12 00:46:36,994 (trainer:732) INFO: 39epoch:train:9701-9800batch: iter_time=1.214e-04, forward_time=0.146, loss_ctc=67.094, loss_att=48.100, acc=0.715, loss=53.798, backward_time=1.028, grad_norm=99.421, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.737e-05, train_time=2.717
+[gpub007:0/64] 2023-07-12 00:48:53,521 (trainer:732) INFO: 39epoch:train:9801-9900batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=77.211, loss_att=58.814, acc=0.718, loss=64.333, backward_time=1.031, grad_norm=115.299, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.737e-05, train_time=2.730
+[gpub007:0/64] 2023-07-12 00:51:09,754 (trainer:732) INFO: 39epoch:train:9901-10000batch: iter_time=1.240e-04, forward_time=0.147, loss_ctc=75.077, loss_att=57.246, acc=0.722, loss=62.595, backward_time=1.030, grad_norm=115.911, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.736e-05, train_time=2.724
+[gpub007:0/64] 2023-07-12 01:05:55,834 (trainer:338) INFO: 39epoch results: [train] iter_time=0.163, forward_time=0.147, loss_ctc=72.756, loss_att=56.123, acc=0.707, loss=61.113, backward_time=1.031, grad_norm=118.372, clip=100.000, loss_scale=2.608e+32, optim_step_time=0.182, optim0_lr0=5.774e-05, train_time=3.347, time=4 hours, 39 minutes and 20.7 seconds, total_count=360000, gpu_max_cached_mem_GB=37.219, [valid] loss_ctc=44.378, cer_ctc=0.263, loss_att=37.628, acc=0.669, cer=0.434, wer=1.000, loss=39.653, time=8 minutes and 26.87 seconds, total_count=36938, gpu_max_cached_mem_GB=37.219, [att_plot] time=5 minutes and 55.9 seconds, total_count=0, gpu_max_cached_mem_GB=37.219
+[gpub007:0/64] 2023-07-12 01:06:13,306 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub007:0/64] 2023-07-12 01:06:13,316 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/34epoch.pth
+[gpub007:0/64] 2023-07-12 01:06:13,316 (trainer:272) INFO: 40/50epoch started. Estimated time to finish: 2 days, 5 hours and 35 minutes
+[gpub007:0/64] 2023-07-12 01:06:13,891 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub007:0/64] 2023-07-12 01:06:32,113 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub007:0/64] 2023-07-12 01:06:35,520 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f6628b15180>)
+[gpub007:0/64] 2023-07-12 01:06:35,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub007:0/64] 2023-07-12 01:06:35,527 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub007:0/64] 2023-07-12 01:13:09,997 (trainer:732) INFO: 40epoch:train:1-100batch: iter_time=2.741, forward_time=0.178, loss_ctc=61.185, loss_att=44.059, acc=0.696, loss=49.197, backward_time=1.040, grad_norm=120.778, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.184, optim0_lr0=5.735e-05, train_time=8.323
+[gpub007:0/64] 2023-07-12 01:15:26,329 (trainer:732) INFO: 40epoch:train:101-200batch: iter_time=1.114e-04, forward_time=0.144, loss_ctc=72.157, loss_att=57.240, acc=0.700, loss=61.715, backward_time=1.029, grad_norm=119.348, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.734e-05, train_time=2.727
+[gpub007:0/64] 2023-07-12 01:17:46,280 (trainer:732) INFO: 40epoch:train:201-300batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=81.039, loss_att=57.292, acc=0.717, loss=64.417, backward_time=1.028, grad_norm=165.528, clip=100.000, loss_scale=3.245e+32, optim_step_time=0.182, optim0_lr0=5.734e-05, train_time=2.799
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log
new file mode 100644
index 0000000000000000000000000000000000000000..9bda1668159e277a7ef0b4685c57c08a8c5385a5
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.5.log
@@ -0,0 +1,4446 @@
+# Running on gpub005.delta.ncsa.illinois.edu
+# Started at Fri Jul 7 20:05:18 CDT 2023
+# SLURMD_NODENAME=gpub005
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2138608
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2138608
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[005,012-014,018,030,039-041,067,072,084,095-098]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[005,012-014,018,030,039-041,067,072,084,095-098]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=2408067
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub005
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_7cf88da0-31a7-4a7a-b755-938512feff6b
+[gpub005:0/64] 2023-07-07 20:08:24,356 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub005:0/64] 2023-07-07 20:08:25,263 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub005:0/64] 2023-07-07 20:08:25,288 (s2t:483) INFO: Vocabulary size: 50002
+[gpub005:0/64] 2023-07-07 20:08:40,904 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub005:0/64] 2023-07-07 20:08:40,913 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub005:0/64] 2023-07-07 20:08:40,939 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub005:0/64] 2023-07-07 20:08:41,640 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub005:0/64] 2023-07-07 20:08:50,082 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 20:08:50,311 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa271d37e80>)
+[gpub005:0/64] 2023-07-07 20:08:50,311 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub005:0/64] 2023-07-07 20:08:50,314 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub005:0/64] 2023-07-07 20:08:50,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa271d37b20>)
+[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub005:0/64] 2023-07-07 20:08:51,112 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub005:0/64] 2023-07-07 20:09:20,037 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub005:2408151:2408151 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0>
+gpub005:2408151:2408151 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub005:2408151:2408151 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub005:0/64] 2023-07-07 20:09:25,530 (trainer:284) INFO: 23/30epoch started
+[gpub005:0/64] 2023-07-07 20:09:25,594 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-07 20:09:43,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 20:09:46,752 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c9b95600>)
+[gpub005:0/64] 2023-07-07 20:09:46,752 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-07 20:09:46,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub005:2408154:2408154 [3] NCCL INFO cudaDriverVersion 12010
+gpub005:2408154:2408154 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0>
+gpub005:2408154:2408154 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub005:2408154:2408227 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0>
+gpub005:2408154:2408227 [3] NCCL INFO Using network IB
+gpub005:2408154:2408227 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub005:2408154:2408227 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub005:2408154:2408227 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub005:2408154:2408227 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub005:2408154:2408227 [3] NCCL INFO Connected all rings
+gpub005:2408154:2408227 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub005:2408154:2408227 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub005:2408154:2408227 [3] NCCL INFO Connected all trees
+gpub005:2408154:2408227 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub005:2408154:2408227 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub005:2408154:2408227 [3] NCCL INFO comm 0x519a6580 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub005:2408151:2408226 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0>
+gpub005:2408151:2408226 [0] NCCL INFO Using network IB
+gpub005:2408151:2408226 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub005:2408151:2408226 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub005:2408151:2408226 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub005:2408151:2408226 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub005:2408151:2408226 [0] NCCL INFO Connected all rings
+gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub005:2408151:2408226 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub005:2408151:2408226 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub005:2408151:2408226 [0] NCCL INFO Connected all trees
+gpub005:2408151:2408226 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub005:2408151:2408226 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub005:2408151:2408226 [0] NCCL INFO comm 0x8dda0850 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub005:2408152:2408152 [1] NCCL INFO cudaDriverVersion 12010
+gpub005:2408152:2408152 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0>
+gpub005:2408152:2408152 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub005:2408152:2408228 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0>
+gpub005:2408152:2408228 [1] NCCL INFO Using network IB
+gpub005:2408152:2408228 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub005:2408152:2408228 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub005:2408152:2408228 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub005:2408152:2408228 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub005:2408152:2408228 [1] NCCL INFO Connected all rings
+gpub005:2408152:2408228 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub005:2408152:2408228 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub005:2408152:2408228 [1] NCCL INFO Connected all trees
+gpub005:2408152:2408228 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub005:2408152:2408228 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub005:2408152:2408228 [1] NCCL INFO comm 0x50e7e140 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub067:1574054:1574054 [0] NCCL INFO cudaDriverVersion 12010
+gpub067:1574054:1574054 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1574054:1574054 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1574054:1574131 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1574054:1574131 [0] NCCL INFO Using network IB
+gpub067:1574054:1574131 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub067:1574054:1574131 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub067:1574054:1574131 [0] NCCL INFO Connected all rings
+gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub067:1574054:1574131 [0] NCCL INFO Connected all trees
+gpub067:1574054:1574131 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1574054:1574131 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1574054:1574131 [0] NCCL INFO comm 0x4f342150 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub095:2520061:2520061 [2] NCCL INFO cudaDriverVersion 12010
+gpub095:2520061:2520061 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0>
+gpub095:2520061:2520061 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub095:2520061:2520137 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0>
+gpub095:2520061:2520137 [2] NCCL INFO Using network IB
+gpub095:2520061:2520137 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub095:2520061:2520137 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub095:2520061:2520137 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub095:2520061:2520137 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub095:2520061:2520137 [2] NCCL INFO Connected all rings
+gpub095:2520061:2520137 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub095:2520061:2520137 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub095:2520061:2520137 [2] NCCL INFO Connected all trees
+gpub095:2520061:2520137 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub095:2520061:2520137 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub095:2520061:2520137 [2] NCCL INFO comm 0x91b7930 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub013:1694053:1694053 [0] NCCL INFO cudaDriverVersion 12010
+gpub013:1694053:1694053 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1694053:1694053 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1694053:1694130 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1694053:1694130 [0] NCCL INFO Using network IB
+gpub013:1694053:1694130 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub013:1694053:1694130 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub013:1694053:1694130 [0] NCCL INFO Connected all rings
+gpub005:2408153:2408153 [2] NCCL INFO cudaDriverVersion 12010
+gpub005:2408153:2408153 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.105<0>
+gpub005:2408153:2408153 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub005:2408153:2408229 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.105<0>
+gpub005:2408153:2408229 [2] NCCL INFO Using network IB
+gpub005:2408153:2408229 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub005:2408153:2408229 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub005:2408153:2408229 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub005:2408153:2408229 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub005:2408153:2408229 [2] NCCL INFO Connected all rings
+gpub005:2408153:2408229 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub005:2408153:2408229 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub013:1694053:1694130 [0] NCCL INFO Connected all trees
+gpub013:1694053:1694130 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub013:1694053:1694130 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1694053:1694130 [0] NCCL INFO comm 0x8c6ae750 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub005:2408153:2408229 [2] NCCL INFO Connected all trees
+gpub005:2408153:2408229 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub005:2408153:2408229 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub005:2408153:2408229 [2] NCCL INFO comm 0x4fab6870 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub013:1694054:1694054 [1] NCCL INFO cudaDriverVersion 12010
+gpub013:1694054:1694054 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1694054:1694054 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1694054:1694131 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1694054:1694131 [1] NCCL INFO Using network IB
+gpub013:1694054:1694131 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub013:1694054:1694131 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub013:1694054:1694131 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub013:1694054:1694131 [1] NCCL INFO Connected all rings
+gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub013:1694054:1694131 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub013:1694054:1694131 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub013:1694054:1694131 [1] NCCL INFO Connected all trees
+gpub013:1694054:1694131 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub013:1694054:1694131 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1694054:1694131 [1] NCCL INFO comm 0x5088d590 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub013:1694055:1694055 [2] NCCL INFO cudaDriverVersion 12010
+gpub013:1694055:1694055 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1694055:1694055 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1694055:1694128 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1694055:1694128 [2] NCCL INFO Using network IB
+gpub013:1694055:1694128 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub013:1694055:1694128 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub013:1694055:1694128 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub013:1694055:1694128 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub013:1694055:1694128 [2] NCCL INFO Connected all rings
+gpub013:1694055:1694128 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub013:1694055:1694128 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub013:1694055:1694128 [2] NCCL INFO Connected all trees
+gpub013:1694055:1694128 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub013:1694055:1694128 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1694055:1694128 [2] NCCL INFO comm 0xf6b9b10 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub067:1574055:1574055 [1] NCCL INFO cudaDriverVersion 12010
+gpub067:1574055:1574055 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1574055:1574055 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1574055:1574134 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1574055:1574134 [1] NCCL INFO Using network IB
+gpub067:1574055:1574134 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub067:1574055:1574134 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub067:1574055:1574134 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub067:1574055:1574134 [1] NCCL INFO Connected all rings
+gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub067:1574055:1574134 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub067:1574055:1574134 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub067:1574055:1574134 [1] NCCL INFO Connected all trees
+gpub067:1574055:1574134 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1574055:1574134 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1574055:1574134 [1] NCCL INFO comm 0x509b90f0 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub095:2520062:2520062 [3] NCCL INFO cudaDriverVersion 12010
+gpub095:2520062:2520062 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0>
+gpub095:2520062:2520062 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub095:2520062:2520138 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0>
+gpub095:2520062:2520138 [3] NCCL INFO Using network IB
+gpub095:2520062:2520138 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub095:2520062:2520138 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub095:2520062:2520138 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub095:2520062:2520138 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub095:2520062:2520138 [3] NCCL INFO Connected all rings
+gpub095:2520062:2520138 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub095:2520062:2520138 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub095:2520062:2520138 [3] NCCL INFO Connected all trees
+gpub095:2520062:2520138 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub095:2520062:2520138 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub095:2520062:2520138 [3] NCCL INFO comm 0x8c7104c0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub095:2520059:2520059 [0] NCCL INFO cudaDriverVersion 12010
+gpub095:2520059:2520059 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0>
+gpub095:2520059:2520059 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub095:2520059:2520136 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0>
+gpub095:2520059:2520136 [0] NCCL INFO Using network IB
+gpub095:2520059:2520136 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub095:2520059:2520136 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub095:2520059:2520136 [0] NCCL INFO Connected all rings
+gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub095:2520059:2520136 [0] NCCL INFO Connected all trees
+gpub095:2520059:2520136 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub095:2520059:2520136 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub095:2520059:2520136 [0] NCCL INFO comm 0x15bb1c50 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub095:2520060:2520060 [1] NCCL INFO cudaDriverVersion 12010
+gpub095:2520060:2520060 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.195<0>
+gpub095:2520060:2520060 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub095:2520060:2520135 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.195<0>
+gpub095:2520060:2520135 [1] NCCL INFO Using network IB
+gpub095:2520060:2520135 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub095:2520060:2520135 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub095:2520060:2520135 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub095:2520060:2520135 [1] NCCL INFO Connected all rings
+gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub095:2520060:2520135 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub095:2520060:2520135 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub095:2520060:2520135 [1] NCCL INFO Connected all trees
+gpub095:2520060:2520135 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub095:2520060:2520135 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub095:2520060:2520135 [1] NCCL INFO comm 0xb4653490 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub098:1875739:1875739 [1] NCCL INFO cudaDriverVersion 12010
+gpub098:1875739:1875739 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0>
+gpub098:1875739:1875739 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub098:1875739:1875807 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0>
+gpub098:1875739:1875807 [1] NCCL INFO Using network IB
+gpub098:1875739:1875807 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub098:1875739:1875807 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub098:1875739:1875807 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub098:1875739:1875807 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub098:1875739:1875807 [1] NCCL INFO Connected all rings
+gpub098:1875739:1875807 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub098:1875739:1875807 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub098:1875739:1875807 [1] NCCL INFO Connected all trees
+gpub098:1875739:1875807 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub098:1875739:1875807 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub098:1875739:1875807 [1] NCCL INFO comm 0x4ffeee90 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub098:1875738:1875738 [0] NCCL INFO cudaDriverVersion 12010
+gpub098:1875738:1875738 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0>
+gpub098:1875738:1875738 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub098:1875738:1875809 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0>
+gpub098:1875738:1875809 [0] NCCL INFO Using network IB
+gpub098:1875738:1875809 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub098:1875738:1875809 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub098:1875738:1875809 [0] NCCL INFO Connected all rings
+gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub098:1875738:1875809 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub098:1875738:1875809 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub098:1875738:1875809 [0] NCCL INFO Connected all trees
+gpub098:1875738:1875809 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub098:1875738:1875809 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub098:1875738:1875809 [0] NCCL INFO comm 0x9e5ca730 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub098:1875740:1875740 [2] NCCL INFO cudaDriverVersion 12010
+gpub098:1875740:1875740 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0>
+gpub098:1875740:1875740 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub098:1875740:1875808 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0>
+gpub098:1875740:1875808 [2] NCCL INFO Using network IB
+gpub098:1875740:1875808 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub098:1875740:1875808 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub098:1875740:1875808 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub098:1875740:1875808 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub098:1875740:1875808 [2] NCCL INFO Connected all rings
+gpub098:1875740:1875808 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub098:1875740:1875808 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub098:1875740:1875808 [2] NCCL INFO Connected all trees
+gpub098:1875740:1875808 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub098:1875740:1875808 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub098:1875740:1875808 [2] NCCL INFO comm 0x8c9fbb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:4052709:4052709 [1] NCCL INFO cudaDriverVersion 12010
+gpub084:4052709:4052709 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:4052709:4052709 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:4052709:4052793 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:4052709:4052793 [1] NCCL INFO Using network IB
+gpub084:4052709:4052793 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub084:4052709:4052793 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub084:4052709:4052793 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub084:4052709:4052793 [1] NCCL INFO Connected all rings
+gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub072:1805521:1805521 [2] NCCL INFO cudaDriverVersion 12010
+gpub072:1805521:1805521 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0>
+gpub072:1805521:1805521 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub072:1805521:1805605 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0>
+gpub072:1805521:1805605 [2] NCCL INFO Using network IB
+gpub072:1805521:1805605 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub072:1805521:1805605 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub072:1805521:1805605 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub072:1805521:1805605 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub072:1805521:1805605 [2] NCCL INFO Connected all rings
+gpub072:1805521:1805605 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub072:1805521:1805605 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub084:4052709:4052793 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub084:4052709:4052793 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub084:4052709:4052793 [1] NCCL INFO Connected all trees
+gpub084:4052709:4052793 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:4052709:4052793 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:4052709:4052793 [1] NCCL INFO comm 0xd834420 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub072:1805521:1805605 [2] NCCL INFO Connected all trees
+gpub072:1805521:1805605 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub072:1805521:1805605 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub072:1805521:1805605 [2] NCCL INFO comm 0x8d829e60 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub067:1574056:1574056 [2] NCCL INFO cudaDriverVersion 12010
+gpub067:1574056:1574056 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1574056:1574056 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1574056:1574133 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1574056:1574133 [2] NCCL INFO Using network IB
+gpub067:1574056:1574133 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub067:1574056:1574133 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub067:1574056:1574133 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub067:1574056:1574133 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub067:1574056:1574133 [2] NCCL INFO Connected all rings
+gpub067:1574056:1574133 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub067:1574056:1574133 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub067:1574056:1574133 [2] NCCL INFO Connected all trees
+gpub067:1574056:1574133 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1574056:1574133 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1574056:1574133 [2] NCCL INFO comm 0xb006d7d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub013:1694056:1694056 [3] NCCL INFO cudaDriverVersion 12010
+gpub013:1694056:1694056 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.113<0>
+gpub013:1694056:1694056 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub013:1694056:1694129 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.113<0>
+gpub013:1694056:1694129 [3] NCCL INFO Using network IB
+gpub013:1694056:1694129 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub013:1694056:1694129 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub013:1694056:1694129 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub013:1694056:1694129 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub013:1694056:1694129 [3] NCCL INFO Connected all rings
+gpub013:1694056:1694129 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub013:1694056:1694129 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub013:1694056:1694129 [3] NCCL INFO Connected all trees
+gpub013:1694056:1694129 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub013:1694056:1694129 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub013:1694056:1694129 [3] NCCL INFO comm 0x8c00090 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub040:2093690:2093690 [0] NCCL INFO cudaDriverVersion 12010
+gpub040:2093690:2093690 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:2093690:2093690 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:2093690:2093772 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:2093690:2093772 [0] NCCL INFO Using network IB
+gpub040:2093690:2093772 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub040:2093690:2093772 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub040:2093690:2093772 [0] NCCL INFO Connected all rings
+gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub040:2093690:2093772 [0] NCCL INFO Connected all trees
+gpub040:2093690:2093772 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub040:2093690:2093772 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:2093690:2093772 [0] NCCL INFO comm 0xba9dc4d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub014:1495255:1495255 [1] NCCL INFO cudaDriverVersion 12010
+gpub014:1495255:1495255 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1495255:1495255 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1495255:1495331 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1495255:1495331 [1] NCCL INFO Using network IB
+gpub014:1495255:1495331 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub014:1495255:1495331 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub014:1495255:1495331 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub014:1495255:1495331 [1] NCCL INFO Connected all rings
+gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub014:1495255:1495331 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub014:1495255:1495331 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub014:1495255:1495331 [1] NCCL INFO Connected all trees
+gpub014:1495255:1495331 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub014:1495255:1495331 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1495255:1495331 [1] NCCL INFO comm 0x515d3c50 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub098:1875741:1875741 [3] NCCL INFO cudaDriverVersion 12010
+gpub098:1875741:1875741 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.198<0>
+gpub098:1875741:1875741 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub098:1875741:1875810 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.198<0>
+gpub098:1875741:1875810 [3] NCCL INFO Using network IB
+gpub098:1875741:1875810 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub098:1875741:1875810 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub098:1875741:1875810 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub098:1875741:1875810 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub098:1875741:1875810 [3] NCCL INFO Connected all rings
+gpub098:1875741:1875810 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub098:1875741:1875810 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub098:1875741:1875810 [3] NCCL INFO Connected all trees
+gpub098:1875741:1875810 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub098:1875741:1875810 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub098:1875741:1875810 [3] NCCL INFO comm 0x4ecd4ee0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub072:1805520:1805520 [1] NCCL INFO cudaDriverVersion 12010
+gpub072:1805520:1805520 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0>
+gpub072:1805520:1805520 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub072:1805520:1805604 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0>
+gpub072:1805520:1805604 [1] NCCL INFO Using network IB
+gpub072:1805520:1805604 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub072:1805520:1805604 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub072:1805520:1805604 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub072:1805520:1805604 [1] NCCL INFO Connected all rings
+gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub072:1805520:1805604 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub072:1805520:1805604 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub072:1805520:1805604 [1] NCCL INFO Connected all trees
+gpub072:1805520:1805604 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub072:1805520:1805604 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub072:1805520:1805604 [1] NCCL INFO comm 0xb6f41780 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub067:1574057:1574057 [3] NCCL INFO cudaDriverVersion 12010
+gpub067:1574057:1574057 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1574057:1574057 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1574057:1574132 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1574057:1574132 [3] NCCL INFO Using network IB
+gpub067:1574057:1574132 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub067:1574057:1574132 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub067:1574057:1574132 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub067:1574057:1574132 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub067:1574057:1574132 [3] NCCL INFO Connected all rings
+gpub067:1574057:1574132 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub067:1574057:1574132 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub067:1574057:1574132 [3] NCCL INFO Connected all trees
+gpub067:1574057:1574132 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1574057:1574132 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1574057:1574132 [3] NCCL INFO comm 0x8d973650 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub014:1495257:1495257 [3] NCCL INFO cudaDriverVersion 12010
+gpub014:1495257:1495257 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1495257:1495257 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1495257:1495328 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1495257:1495328 [3] NCCL INFO Using network IB
+gpub014:1495257:1495328 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub014:1495257:1495328 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub014:1495257:1495328 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub014:1495257:1495328 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub014:1495257:1495328 [3] NCCL INFO Connected all rings
+gpub014:1495257:1495328 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub014:1495257:1495328 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub014:1495257:1495328 [3] NCCL INFO Connected all trees
+gpub014:1495257:1495328 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub014:1495257:1495328 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1495257:1495328 [3] NCCL INFO comm 0x946a450 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub096:1645784:1645784 [0] NCCL INFO cudaDriverVersion 12010
+gpub096:1645784:1645784 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1645784:1645784 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1645784:1645856 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1645784:1645856 [0] NCCL INFO Using network IB
+gpub096:1645784:1645856 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub096:1645784:1645856 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub096:1645784:1645856 [0] NCCL INFO Connected all rings
+gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub096:1645784:1645856 [0] NCCL INFO Connected all trees
+gpub096:1645784:1645856 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1645784:1645856 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1645784:1645856 [0] NCCL INFO comm 0xcdcc14f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub012:1607821:1607821 [3] NCCL INFO cudaDriverVersion 12010
+gpub012:1607821:1607821 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1607821:1607821 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1607821:1607901 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1607821:1607901 [3] NCCL INFO Using network IB
+gpub012:1607821:1607901 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub012:1607821:1607901 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub012:1607821:1607901 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub012:1607821:1607901 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub012:1607821:1607901 [3] NCCL INFO Connected all rings
+gpub012:1607821:1607901 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub012:1607821:1607901 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub012:1607821:1607901 [3] NCCL INFO Connected all trees
+gpub012:1607821:1607901 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub012:1607821:1607901 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1607821:1607901 [3] NCCL INFO comm 0x516c3430 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub041:1527385:1527385 [2] NCCL INFO cudaDriverVersion 12010
+gpub041:1527385:1527385 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1527385:1527385 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1527385:1527462 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1527385:1527462 [2] NCCL INFO Using network IB
+gpub041:1527385:1527462 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub041:1527385:1527462 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub041:1527385:1527462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub041:1527385:1527462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub041:1527385:1527462 [2] NCCL INFO Connected all rings
+gpub041:1527385:1527462 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub041:1527385:1527462 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub041:1527385:1527462 [2] NCCL INFO Connected all trees
+gpub041:1527385:1527462 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub041:1527385:1527462 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1527385:1527462 [2] NCCL INFO comm 0x5082e9e0 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub041:1527384:1527384 [1] NCCL INFO cudaDriverVersion 12010
+gpub041:1527384:1527384 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1527384:1527384 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1527384:1527459 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1527384:1527459 [1] NCCL INFO Using network IB
+gpub041:1527384:1527459 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub041:1527384:1527459 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub041:1527384:1527459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub041:1527384:1527459 [1] NCCL INFO Connected all rings
+gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub041:1527384:1527459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub041:1527384:1527459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub041:1527384:1527459 [1] NCCL INFO Connected all trees
+gpub041:1527384:1527459 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub041:1527384:1527459 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1527384:1527459 [1] NCCL INFO comm 0x512a04d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub041:1527383:1527383 [0] NCCL INFO cudaDriverVersion 12010
+gpub041:1527383:1527383 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1527383:1527383 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1527383:1527461 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1527383:1527461 [0] NCCL INFO Using network IB
+gpub041:1527383:1527461 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub041:1527383:1527461 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub041:1527383:1527461 [0] NCCL INFO Connected all rings
+gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub041:1527383:1527461 [0] NCCL INFO Connected all trees
+gpub041:1527383:1527461 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub041:1527383:1527461 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1527383:1527461 [0] NCCL INFO comm 0x5103b480 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub018:1650756:1650756 [3] NCCL INFO cudaDriverVersion 12010
+gpub018:1650756:1650756 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0>
+gpub018:1650756:1650756 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub018:1650756:1650832 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0>
+gpub018:1650756:1650832 [3] NCCL INFO Using network IB
+gpub018:1650756:1650832 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub018:1650756:1650832 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub018:1650756:1650832 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub018:1650756:1650832 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub018:1650756:1650832 [3] NCCL INFO Connected all rings
+gpub018:1650756:1650832 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub018:1650756:1650832 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub018:1650756:1650832 [3] NCCL INFO Connected all trees
+gpub018:1650756:1650832 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub018:1650756:1650832 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub018:1650756:1650832 [3] NCCL INFO comm 0x8c504da0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub084:4052710:4052710 [2] NCCL INFO cudaDriverVersion 12010
+gpub084:4052710:4052710 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:4052710:4052710 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:4052710:4052796 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:4052710:4052796 [2] NCCL INFO Using network IB
+gpub084:4052710:4052796 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub084:4052710:4052796 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub084:4052710:4052796 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub084:4052710:4052796 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub084:4052710:4052796 [2] NCCL INFO Connected all rings
+gpub084:4052710:4052796 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub084:4052710:4052796 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub084:4052710:4052796 [2] NCCL INFO Connected all trees
+gpub084:4052710:4052796 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:4052710:4052796 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:4052710:4052796 [2] NCCL INFO comm 0x4f81fce0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub014:1495256:1495256 [2] NCCL INFO cudaDriverVersion 12010
+gpub014:1495256:1495256 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1495256:1495256 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1495256:1495330 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1495256:1495330 [2] NCCL INFO Using network IB
+gpub014:1495256:1495330 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub014:1495256:1495330 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub014:1495256:1495330 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub014:1495256:1495330 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub014:1495256:1495330 [2] NCCL INFO Connected all rings
+gpub014:1495256:1495330 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub014:1495256:1495330 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub018:1650755:1650755 [2] NCCL INFO cudaDriverVersion 12010
+gpub018:1650755:1650755 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0>
+gpub018:1650755:1650755 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub018:1650755:1650833 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0>
+gpub018:1650755:1650833 [2] NCCL INFO Using network IB
+gpub018:1650755:1650833 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub018:1650755:1650833 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub018:1650755:1650833 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub018:1650755:1650833 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub018:1650755:1650833 [2] NCCL INFO Connected all rings
+gpub018:1650755:1650833 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub018:1650755:1650833 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub014:1495256:1495330 [2] NCCL INFO Connected all trees
+gpub014:1495256:1495330 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub014:1495256:1495330 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1495256:1495330 [2] NCCL INFO comm 0x9f383a90 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub018:1650755:1650833 [2] NCCL INFO Connected all trees
+gpub018:1650755:1650833 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub018:1650755:1650833 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub018:1650755:1650833 [2] NCCL INFO comm 0x513374c0 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub040:2093692:2093692 [2] NCCL INFO cudaDriverVersion 12010
+gpub040:2093692:2093692 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:2093692:2093692 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:2093692:2093775 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:2093692:2093775 [2] NCCL INFO Using network IB
+gpub040:2093692:2093775 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub040:2093692:2093775 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub040:2093692:2093775 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub040:2093692:2093775 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub040:2093692:2093775 [2] NCCL INFO Connected all rings
+gpub040:2093692:2093775 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub040:2093692:2093775 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub084:4052711:4052711 [3] NCCL INFO cudaDriverVersion 12010
+gpub084:4052711:4052711 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:4052711:4052711 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:4052711:4052795 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:4052711:4052795 [3] NCCL INFO Using network IB
+gpub084:4052711:4052795 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub084:4052711:4052795 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub084:4052711:4052795 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub084:4052711:4052795 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub084:4052711:4052795 [3] NCCL INFO Connected all rings
+gpub084:4052711:4052795 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub084:4052711:4052795 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub040:2093692:2093775 [2] NCCL INFO Connected all trees
+gpub040:2093692:2093775 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub040:2093692:2093775 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:2093692:2093775 [2] NCCL INFO comm 0x514bd130 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:4052711:4052795 [3] NCCL INFO Connected all trees
+gpub084:4052711:4052795 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:4052711:4052795 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:4052711:4052795 [3] NCCL INFO comm 0xa5710b50 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub072:1805519:1805519 [0] NCCL INFO cudaDriverVersion 12010
+gpub072:1805519:1805519 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0>
+gpub072:1805519:1805519 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub072:1805519:1805602 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0>
+gpub072:1805519:1805602 [0] NCCL INFO Using network IB
+gpub072:1805519:1805602 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub072:1805519:1805602 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub072:1805519:1805602 [0] NCCL INFO Connected all rings
+gpub012:1607819:1607819 [1] NCCL INFO cudaDriverVersion 12010
+gpub012:1607819:1607819 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1607819:1607819 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1607819:1607899 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1607819:1607899 [1] NCCL INFO Using network IB
+gpub012:1607819:1607899 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub012:1607819:1607899 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub012:1607819:1607899 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub012:1607819:1607899 [1] NCCL INFO Connected all rings
+gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub072:1805519:1805602 [0] NCCL INFO Connected all trees
+gpub072:1805519:1805602 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub072:1805519:1805602 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub072:1805519:1805602 [0] NCCL INFO comm 0x4fb13ad0 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub012:1607819:1607899 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub012:1607819:1607899 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub012:1607819:1607899 [1] NCCL INFO Connected all trees
+gpub012:1607819:1607899 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub012:1607819:1607899 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1607819:1607899 [1] NCCL INFO comm 0xa4ee840 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub039:2093177:2093177 [2] NCCL INFO cudaDriverVersion 12010
+gpub039:2093177:2093177 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:2093177:2093177 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:2093177:2093242 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:2093177:2093242 [2] NCCL INFO Using network IB
+gpub039:2093177:2093242 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub039:2093177:2093242 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub039:2093177:2093242 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub039:2093177:2093242 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub039:2093177:2093242 [2] NCCL INFO Connected all rings
+gpub039:2093177:2093242 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub039:2093177:2093242 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub039:2093177:2093242 [2] NCCL INFO Connected all trees
+gpub039:2093177:2093242 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub039:2093177:2093242 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:2093177:2093242 [2] NCCL INFO comm 0xa965b10 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub097:1705871:1705871 [3] NCCL INFO cudaDriverVersion 12010
+gpub097:1705871:1705871 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0>
+gpub097:1705871:1705871 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub097:1705871:1705957 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0>
+gpub097:1705871:1705957 [3] NCCL INFO Using network IB
+gpub097:1705871:1705957 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub097:1705871:1705957 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub097:1705871:1705957 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub097:1705871:1705957 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub097:1705871:1705957 [3] NCCL INFO Connected all rings
+gpub097:1705871:1705957 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub097:1705871:1705957 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub097:1705871:1705957 [3] NCCL INFO Connected all trees
+gpub097:1705871:1705957 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub097:1705871:1705957 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub097:1705871:1705957 [3] NCCL INFO comm 0x94d2db0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub097:1705870:1705870 [2] NCCL INFO cudaDriverVersion 12010
+gpub097:1705870:1705870 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0>
+gpub097:1705870:1705870 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub097:1705870:1705956 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0>
+gpub097:1705870:1705956 [2] NCCL INFO Using network IB
+gpub097:1705870:1705956 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub097:1705870:1705956 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub097:1705870:1705956 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub097:1705870:1705956 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub097:1705870:1705956 [2] NCCL INFO Connected all rings
+gpub097:1705870:1705956 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub097:1705870:1705956 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub039:2093175:2093175 [0] NCCL INFO cudaDriverVersion 12010
+gpub039:2093175:2093175 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:2093175:2093175 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:2093175:2093244 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:2093175:2093244 [0] NCCL INFO Using network IB
+gpub039:2093175:2093244 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub039:2093175:2093244 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub039:2093175:2093244 [0] NCCL INFO Connected all rings
+gpub097:1705870:1705956 [2] NCCL INFO Connected all trees
+gpub097:1705870:1705956 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub097:1705870:1705956 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub097:1705870:1705956 [2] NCCL INFO comm 0x50f117a0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub039:2093175:2093244 [0] NCCL INFO Connected all trees
+gpub039:2093175:2093244 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub039:2093175:2093244 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:2093175:2093244 [0] NCCL INFO comm 0xa2cab60 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub041:1527386:1527386 [3] NCCL INFO cudaDriverVersion 12010
+gpub041:1527386:1527386 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.141<0>
+gpub041:1527386:1527386 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub041:1527386:1527460 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.141<0>
+gpub041:1527386:1527460 [3] NCCL INFO Using network IB
+gpub041:1527386:1527460 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub041:1527386:1527460 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub041:1527386:1527460 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub041:1527386:1527460 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub041:1527386:1527460 [3] NCCL INFO Connected all rings
+gpub041:1527386:1527460 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub041:1527386:1527460 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub030:2531969:2531969 [0] NCCL INFO cudaDriverVersion 12010
+gpub030:2531969:2531969 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2531969:2531969 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2531969:2532049 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2531969:2532049 [0] NCCL INFO Using network IB
+gpub030:2531969:2532049 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub030:2531969:2532049 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub030:2531969:2532049 [0] NCCL INFO Connected all rings
+gpub041:1527386:1527460 [3] NCCL INFO Connected all trees
+gpub041:1527386:1527460 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub041:1527386:1527460 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub041:1527386:1527460 [3] NCCL INFO comm 0x4f979490 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub012:1607818:1607818 [0] NCCL INFO cudaDriverVersion 12010
+gpub012:1607818:1607818 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1607818:1607818 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1607818:1607902 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1607818:1607902 [0] NCCL INFO Using network IB
+gpub012:1607818:1607902 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub012:1607818:1607902 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub012:1607818:1607902 [0] NCCL INFO Connected all rings
+gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub030:2531969:2532049 [0] NCCL INFO Connected all trees
+gpub030:2531969:2532049 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2531969:2532049 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2531969:2532049 [0] NCCL INFO comm 0xb4f6de0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub012:1607818:1607902 [0] NCCL INFO Connected all trees
+gpub012:1607818:1607902 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub012:1607818:1607902 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1607818:1607902 [0] NCCL INFO comm 0xa8f3bc80 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub039:2093178:2093178 [3] NCCL INFO cudaDriverVersion 12010
+gpub039:2093178:2093178 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:2093178:2093178 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:2093178:2093243 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:2093178:2093243 [3] NCCL INFO Using network IB
+gpub039:2093178:2093243 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub039:2093178:2093243 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub039:2093178:2093243 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub039:2093178:2093243 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub039:2093178:2093243 [3] NCCL INFO Connected all rings
+gpub039:2093178:2093243 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub039:2093178:2093243 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub039:2093178:2093243 [3] NCCL INFO Connected all trees
+gpub039:2093178:2093243 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub039:2093178:2093243 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:2093178:2093243 [3] NCCL INFO comm 0x4fc75960 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub030:2531970:2531970 [1] NCCL INFO cudaDriverVersion 12010
+gpub030:2531970:2531970 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2531970:2531970 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2531970:2532052 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2531970:2532052 [1] NCCL INFO Using network IB
+gpub030:2531970:2532052 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub030:2531970:2532052 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub030:2531970:2532052 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub030:2531970:2532052 [1] NCCL INFO Connected all rings
+gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub030:2531970:2532052 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub030:2531970:2532052 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub030:2531970:2532052 [1] NCCL INFO Connected all trees
+gpub030:2531970:2532052 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2531970:2532052 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2531970:2532052 [1] NCCL INFO comm 0x8ebc3340 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub012:1607820:1607820 [2] NCCL INFO cudaDriverVersion 12010
+gpub012:1607820:1607820 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.112<0>
+gpub012:1607820:1607820 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub012:1607820:1607900 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.112<0>
+gpub012:1607820:1607900 [2] NCCL INFO Using network IB
+gpub012:1607820:1607900 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub012:1607820:1607900 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub012:1607820:1607900 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub012:1607820:1607900 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub012:1607820:1607900 [2] NCCL INFO Connected all rings
+gpub012:1607820:1607900 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub012:1607820:1607900 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub072:1805522:1805522 [3] NCCL INFO cudaDriverVersion 12010
+gpub072:1805522:1805522 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.172<0>
+gpub072:1805522:1805522 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub072:1805522:1805603 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.172<0>
+gpub072:1805522:1805603 [3] NCCL INFO Using network IB
+gpub072:1805522:1805603 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub072:1805522:1805603 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub072:1805522:1805603 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub072:1805522:1805603 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub072:1805522:1805603 [3] NCCL INFO Connected all rings
+gpub072:1805522:1805603 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub072:1805522:1805603 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub039:2093176:2093176 [1] NCCL INFO cudaDriverVersion 12010
+gpub039:2093176:2093176 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.139<0>
+gpub039:2093176:2093176 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub039:2093176:2093245 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.139<0>
+gpub039:2093176:2093245 [1] NCCL INFO Using network IB
+gpub039:2093176:2093245 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub039:2093176:2093245 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub039:2093176:2093245 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub039:2093176:2093245 [1] NCCL INFO Connected all rings
+gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub012:1607820:1607900 [2] NCCL INFO Connected all trees
+gpub012:1607820:1607900 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub012:1607820:1607900 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub012:1607820:1607900 [2] NCCL INFO comm 0x503f1430 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub072:1805522:1805603 [3] NCCL INFO Connected all trees
+gpub072:1805522:1805603 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub072:1805522:1805603 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub072:1805522:1805603 [3] NCCL INFO comm 0x50740450 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub039:2093176:2093245 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub039:2093176:2093245 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub039:2093176:2093245 [1] NCCL INFO Connected all trees
+gpub039:2093176:2093245 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub039:2093176:2093245 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub039:2093176:2093245 [1] NCCL INFO comm 0xbcbaabd0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub030:2531972:2531972 [3] NCCL INFO cudaDriverVersion 12010
+gpub030:2531972:2531972 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2531972:2531972 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2531972:2532050 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2531972:2532050 [3] NCCL INFO Using network IB
+gpub030:2531972:2532050 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub030:2531972:2532050 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub030:2531972:2532050 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub030:2531972:2532050 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub030:2531972:2532050 [3] NCCL INFO Connected all rings
+gpub030:2531972:2532050 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub030:2531972:2532050 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub030:2531972:2532050 [3] NCCL INFO Connected all trees
+gpub030:2531972:2532050 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2531972:2532050 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2531972:2532050 [3] NCCL INFO comm 0xa2cf1d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub018:1650754:1650754 [1] NCCL INFO cudaDriverVersion 12010
+gpub018:1650754:1650754 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0>
+gpub018:1650754:1650754 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub018:1650754:1650831 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0>
+gpub018:1650754:1650831 [1] NCCL INFO Using network IB
+gpub018:1650754:1650831 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub018:1650754:1650831 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub018:1650754:1650831 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub018:1650754:1650831 [1] NCCL INFO Connected all rings
+gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub018:1650754:1650831 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub018:1650754:1650831 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub018:1650754:1650831 [1] NCCL INFO Connected all trees
+gpub018:1650754:1650831 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub018:1650754:1650831 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub018:1650754:1650831 [1] NCCL INFO comm 0xa938d420 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub096:1645787:1645787 [3] NCCL INFO cudaDriverVersion 12010
+gpub096:1645787:1645787 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1645787:1645787 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1645787:1645858 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1645787:1645858 [3] NCCL INFO Using network IB
+gpub096:1645787:1645858 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub096:1645787:1645858 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub096:1645787:1645858 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub096:1645787:1645858 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub096:1645787:1645858 [3] NCCL INFO Connected all rings
+gpub096:1645787:1645858 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub096:1645787:1645858 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub096:1645787:1645858 [3] NCCL INFO Connected all trees
+gpub096:1645787:1645858 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1645787:1645858 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1645787:1645858 [3] NCCL INFO comm 0xb78b6390 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub018:1650753:1650753 [0] NCCL INFO cudaDriverVersion 12010
+gpub018:1650753:1650753 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.118<0>
+gpub018:1650753:1650753 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub018:1650753:1650834 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.118<0>
+gpub018:1650753:1650834 [0] NCCL INFO Using network IB
+gpub018:1650753:1650834 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub018:1650753:1650834 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub018:1650753:1650834 [0] NCCL INFO Connected all rings
+gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub018:1650753:1650834 [0] NCCL INFO Connected all trees
+gpub018:1650753:1650834 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub018:1650753:1650834 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub018:1650753:1650834 [0] NCCL INFO comm 0x4f7a1b90 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub097:1705868:1705868 [0] NCCL INFO cudaDriverVersion 12010
+gpub097:1705868:1705868 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0>
+gpub097:1705868:1705868 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub097:1705868:1705958 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0>
+gpub097:1705868:1705958 [0] NCCL INFO Using network IB
+gpub097:1705868:1705958 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub097:1705868:1705958 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub097:1705868:1705958 [0] NCCL INFO Connected all rings
+gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub097:1705868:1705958 [0] NCCL INFO Connected all trees
+gpub097:1705868:1705958 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub097:1705868:1705958 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub097:1705868:1705958 [0] NCCL INFO comm 0x4f565ad0 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub030:2531971:2531971 [2] NCCL INFO cudaDriverVersion 12010
+gpub030:2531971:2531971 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2531971:2531971 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2531971:2532051 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2531971:2532051 [2] NCCL INFO Using network IB
+gpub030:2531971:2532051 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub030:2531971:2532051 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub030:2531971:2532051 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub030:2531971:2532051 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub030:2531971:2532051 [2] NCCL INFO Connected all rings
+gpub030:2531971:2532051 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub030:2531971:2532051 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub030:2531971:2532051 [2] NCCL INFO Connected all trees
+gpub030:2531971:2532051 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2531971:2532051 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2531971:2532051 [2] NCCL INFO comm 0x8dd18cd0 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub084:4052708:4052708 [0] NCCL INFO cudaDriverVersion 12010
+gpub084:4052708:4052708 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.184<0>
+gpub084:4052708:4052708 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub084:4052708:4052794 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.184<0>
+gpub084:4052708:4052794 [0] NCCL INFO Using network IB
+gpub084:4052708:4052794 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub084:4052708:4052794 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub084:4052708:4052794 [0] NCCL INFO Connected all rings
+gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub084:4052708:4052794 [0] NCCL INFO Connected all trees
+gpub084:4052708:4052794 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub084:4052708:4052794 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub084:4052708:4052794 [0] NCCL INFO comm 0xb576c9d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub096:1645786:1645786 [2] NCCL INFO cudaDriverVersion 12010
+gpub096:1645786:1645786 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1645786:1645786 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1645786:1645857 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1645786:1645857 [2] NCCL INFO Using network IB
+gpub096:1645786:1645857 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub096:1645786:1645857 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub096:1645786:1645857 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub096:1645786:1645857 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub096:1645786:1645857 [2] NCCL INFO Connected all rings
+gpub096:1645786:1645857 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub096:1645786:1645857 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub096:1645786:1645857 [2] NCCL INFO Connected all trees
+gpub096:1645786:1645857 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1645786:1645857 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1645786:1645857 [2] NCCL INFO comm 0x4fe9cb90 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub097:1705869:1705869 [1] NCCL INFO cudaDriverVersion 12010
+gpub097:1705869:1705869 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.197<0>
+gpub097:1705869:1705869 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub097:1705869:1705955 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.197<0>
+gpub097:1705869:1705955 [1] NCCL INFO Using network IB
+gpub097:1705869:1705955 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub097:1705869:1705955 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub097:1705869:1705955 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub097:1705869:1705955 [1] NCCL INFO Connected all rings
+gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub097:1705869:1705955 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub097:1705869:1705955 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub097:1705869:1705955 [1] NCCL INFO Connected all trees
+gpub097:1705869:1705955 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub097:1705869:1705955 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub097:1705869:1705955 [1] NCCL INFO comm 0x8e89510 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub040:2093691:2093691 [1] NCCL INFO cudaDriverVersion 12010
+gpub040:2093691:2093691 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:2093691:2093691 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:2093691:2093774 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:2093691:2093774 [1] NCCL INFO Using network IB
+gpub040:2093691:2093774 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub040:2093691:2093774 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub040:2093691:2093774 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub040:2093691:2093774 [1] NCCL INFO Connected all rings
+gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub040:2093691:2093774 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub040:2093691:2093774 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub040:2093691:2093774 [1] NCCL INFO Connected all trees
+gpub040:2093691:2093774 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub040:2093691:2093774 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:2093691:2093774 [1] NCCL INFO comm 0xb9336880 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub096:1645785:1645785 [1] NCCL INFO cudaDriverVersion 12010
+gpub096:1645785:1645785 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1645785:1645785 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1645785:1645855 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1645785:1645855 [1] NCCL INFO Using network IB
+gpub096:1645785:1645855 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub096:1645785:1645855 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub096:1645785:1645855 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub096:1645785:1645855 [1] NCCL INFO Connected all rings
+gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub096:1645785:1645855 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub096:1645785:1645855 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub096:1645785:1645855 [1] NCCL INFO Connected all trees
+gpub096:1645785:1645855 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1645785:1645855 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1645785:1645855 [1] NCCL INFO comm 0x50f7e840 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub014:1495254:1495254 [0] NCCL INFO cudaDriverVersion 12010
+gpub014:1495254:1495254 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.114<0>
+gpub014:1495254:1495254 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub014:1495254:1495329 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.114<0>
+gpub014:1495254:1495329 [0] NCCL INFO Using network IB
+gpub014:1495254:1495329 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub014:1495254:1495329 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub014:1495254:1495329 [0] NCCL INFO Connected all rings
+gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub014:1495254:1495329 [0] NCCL INFO Connected all trees
+gpub014:1495254:1495329 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub014:1495254:1495329 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub014:1495254:1495329 [0] NCCL INFO comm 0x50fe0a80 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub040:2093693:2093693 [3] NCCL INFO cudaDriverVersion 12010
+gpub040:2093693:2093693 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.140<0>
+gpub040:2093693:2093693 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub040:2093693:2093773 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.140<0>
+gpub040:2093693:2093773 [3] NCCL INFO Using network IB
+gpub040:2093693:2093773 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub040:2093693:2093773 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub040:2093693:2093773 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub040:2093693:2093773 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub040:2093693:2093773 [3] NCCL INFO Connected all rings
+gpub040:2093693:2093773 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub040:2093693:2093773 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub040:2093693:2093773 [3] NCCL INFO Connected all trees
+gpub040:2093693:2093773 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub040:2093693:2093773 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub040:2093693:2093773 [3] NCCL INFO comm 0xbd6eac10 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub005:0/64] 2023-07-07 20:14:03,233 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=1.234, forward_time=0.245, loss_ctc=73.225, loss_att=57.657, acc=0.705, loss=62.327, backward_time=1.047, grad_norm=105.098, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.184, optim0_lr0=7.714e-05, train_time=5.552
+[gpub005:0/64] 2023-07-07 20:16:18,832 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=1.217e-04, forward_time=0.142, loss_ctc=66.064, loss_att=55.513, acc=0.684, loss=58.678, backward_time=1.027, grad_norm=122.006, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.712e-05, train_time=2.713
+[gpub005:0/64] 2023-07-07 20:18:34,753 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=92.044, loss_att=64.944, acc=0.703, loss=73.074, backward_time=1.025, grad_norm=143.859, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.711e-05, train_time=2.718
+[gpub005:0/64] 2023-07-07 20:20:49,996 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=74.575, loss_att=60.629, acc=0.698, loss=64.813, backward_time=1.025, grad_norm=150.059, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.709e-05, train_time=2.705
+[gpub005:0/64] 2023-07-07 20:23:05,113 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=1.261e-04, forward_time=0.142, loss_ctc=80.331, loss_att=61.701, acc=0.708, loss=67.290, backward_time=1.024, grad_norm=120.003, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.707e-05, train_time=2.702
+[gpub005:0/64] 2023-07-07 20:25:19,924 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=1.247e-04, forward_time=0.142, loss_ctc=70.043, loss_att=53.869, acc=0.699, loss=58.721, backward_time=1.021, grad_norm=109.628, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.705e-05, train_time=2.696
+[gpub005:0/64] 2023-07-07 20:27:40,430 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=1.208e-04, forward_time=0.143, loss_ctc=83.967, loss_att=61.947, acc=0.692, loss=68.553, backward_time=1.033, grad_norm=141.886, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.703e-05, train_time=2.810
+[gpub005:0/64] 2023-07-07 20:30:06,403 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=1.237e-04, forward_time=0.142, loss_ctc=75.476, loss_att=56.916, acc=0.696, loss=62.484, backward_time=1.034, grad_norm=118.855, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.701e-05, train_time=2.919
+[gpub005:0/64] 2023-07-07 20:31:01,886 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-07 20:31:19,436 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 20:31:22,919 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c9b97670>)
+[gpub005:0/64] 2023-07-07 20:31:22,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-07 20:31:22,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 20:37:15,137 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=1.375, forward_time=0.172, loss_ctc=72.566, loss_att=53.768, acc=0.698, loss=59.408, backward_time=1.047, grad_norm=114.089, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.182, optim0_lr0=7.700e-05, train_time=8.574
+[gpub005:0/64] 2023-07-07 20:39:31,293 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=65.125, loss_att=51.474, acc=0.694, loss=55.569, backward_time=1.026, grad_norm=100.552, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.182, optim0_lr0=7.698e-05, train_time=2.724
+[gpub005:0/64] 2023-07-07 20:41:47,156 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=1.348e-04, forward_time=0.146, loss_ctc=83.846, loss_att=65.257, acc=0.696, loss=70.834, backward_time=1.027, grad_norm=117.681, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.696e-05, train_time=2.717
+[gpub005:0/64] 2023-07-07 20:44:02,949 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=75.335, loss_att=57.448, acc=0.703, loss=62.814, backward_time=1.027, grad_norm=97.808, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.694e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 20:46:19,227 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=78.643, loss_att=64.187, acc=0.709, loss=68.524, backward_time=1.029, grad_norm=106.806, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.692e-05, train_time=2.725
+[gpub005:0/64] 2023-07-07 20:48:36,388 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=67.582, loss_att=50.486, acc=0.715, loss=55.615, backward_time=1.026, grad_norm=134.933, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.690e-05, train_time=2.743
+[gpub005:0/64] 2023-07-07 20:50:52,551 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=80.110, loss_att=60.648, acc=0.688, loss=66.487, backward_time=1.028, grad_norm=128.046, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.689e-05, train_time=2.723
+[gpub005:0/64] 2023-07-07 20:53:11,035 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=70.534, loss_att=57.154, acc=0.699, loss=61.168, backward_time=1.027, grad_norm=102.436, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.180, optim0_lr0=7.687e-05, train_time=2.769
+[gpub005:0/64] 2023-07-07 20:54:56,286 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-07 20:55:14,037 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 20:55:17,531 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa19e31b280>)
+[gpub005:0/64] 2023-07-07 20:55:17,531 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-07 20:55:17,538 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 21:00:02,783 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=2.623, forward_time=0.186, loss_ctc=70.658, loss_att=52.058, acc=0.699, loss=57.638, backward_time=1.034, grad_norm=90.957, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.183, optim0_lr0=7.685e-05, train_time=8.235
+[gpub005:0/64] 2023-07-07 21:02:18,930 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=70.962, loss_att=57.990, acc=0.707, loss=61.882, backward_time=1.026, grad_norm=106.925, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.683e-05, train_time=2.723
+[gpub005:0/64] 2023-07-07 21:04:34,887 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.197e-04, forward_time=0.143, loss_ctc=73.987, loss_att=55.470, acc=0.695, loss=61.025, backward_time=1.025, grad_norm=128.929, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.681e-05, train_time=2.719
+[gpub005:0/64] 2023-07-07 21:06:50,806 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=84.894, loss_att=66.262, acc=0.695, loss=71.851, backward_time=1.024, grad_norm=114.319, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.181, optim0_lr0=7.680e-05, train_time=2.718
+[gpub005:0/64] 2023-07-07 21:09:06,612 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=72.489, loss_att=59.553, acc=0.708, loss=63.434, backward_time=1.026, grad_norm=101.371, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.678e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 21:11:22,119 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=1.171e-04, forward_time=0.144, loss_ctc=71.626, loss_att=54.390, acc=0.708, loss=59.561, backward_time=1.025, grad_norm=107.121, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.676e-05, train_time=2.710
+[gpub005:0/64] 2023-07-07 21:13:37,533 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=1.151e-04, forward_time=0.144, loss_ctc=79.447, loss_att=59.205, acc=0.696, loss=65.278, backward_time=1.023, grad_norm=121.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.674e-05, train_time=2.708
+[gpub005:0/64] 2023-07-07 21:15:53,132 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=1.182e-04, forward_time=0.144, loss_ctc=75.627, loss_att=58.332, acc=0.698, loss=63.520, backward_time=1.024, grad_norm=126.568, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.672e-05, train_time=2.712
+[gpub005:0/64] 2023-07-07 21:18:08,467 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=1.198e-04, forward_time=0.144, loss_ctc=65.804, loss_att=53.629, acc=0.695, loss=57.282, backward_time=1.023, grad_norm=93.403, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.670e-05, train_time=2.706
+[gpub005:0/64] 2023-07-07 21:18:11,282 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-07 21:18:29,130 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 21:18:32,569 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f99b217ed40>)
+[gpub005:0/64] 2023-07-07 21:18:32,569 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-07 21:18:32,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 21:24:42,763 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.242, forward_time=0.174, loss_ctc=73.983, loss_att=58.373, acc=0.706, loss=63.056, backward_time=1.034, grad_norm=100.591, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.183, optim0_lr0=7.669e-05, train_time=7.886
+[gpub005:0/64] 2023-07-07 21:26:58,297 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=1.190e-04, forward_time=0.144, loss_ctc=64.347, loss_att=52.941, acc=0.692, loss=56.362, backward_time=1.022, grad_norm=103.514, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.667e-05, train_time=2.710
+[gpub005:0/64] 2023-07-07 21:29:14,466 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=1.356e-04, forward_time=0.146, loss_ctc=86.482, loss_att=62.946, acc=0.706, loss=70.007, backward_time=1.028, grad_norm=116.532, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.665e-05, train_time=2.723
+[gpub005:0/64] 2023-07-07 21:31:30,282 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=1.271e-04, forward_time=0.145, loss_ctc=75.022, loss_att=60.111, acc=0.700, loss=64.585, backward_time=1.027, grad_norm=102.153, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.663e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 21:33:45,972 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=1.233e-04, forward_time=0.144, loss_ctc=79.575, loss_att=61.287, acc=0.710, loss=66.773, backward_time=1.024, grad_norm=116.778, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.661e-05, train_time=2.714
+[gpub005:0/64] 2023-07-07 21:36:01,542 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=1.329e-04, forward_time=0.144, loss_ctc=66.483, loss_att=51.229, acc=0.704, loss=55.805, backward_time=1.024, grad_norm=92.400, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.660e-05, train_time=2.711
+[gpub005:0/64] 2023-07-07 21:38:17,343 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.364e-04, forward_time=0.146, loss_ctc=78.366, loss_att=58.791, acc=0.696, loss=64.664, backward_time=1.027, grad_norm=123.479, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.658e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 21:40:32,848 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.406e-04, forward_time=0.144, loss_ctc=70.759, loss_att=54.241, acc=0.709, loss=59.197, backward_time=1.024, grad_norm=101.288, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.656e-05, train_time=2.710
+[gpub005:0/64] 2023-07-07 21:41:20,432 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-07 21:41:38,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 21:41:42,003 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa038117460>)
+[gpub005:0/64] 2023-07-07 21:41:42,004 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-07 21:41:42,010 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 21:48:03,969 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.232, forward_time=0.144, loss_ctc=68.130, loss_att=52.958, acc=0.698, loss=57.509, backward_time=1.041, grad_norm=100.963, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.180, optim0_lr0=7.654e-05, train_time=9.022
+[gpub005:0/64] 2023-07-07 21:50:20,129 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=1.270e-04, forward_time=0.144, loss_ctc=69.763, loss_att=54.215, acc=0.711, loss=58.879, backward_time=1.024, grad_norm=98.620, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.653e-05, train_time=2.723
+[gpub005:0/64] 2023-07-07 21:52:37,246 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=1.350e-04, forward_time=0.146, loss_ctc=72.406, loss_att=56.129, acc=0.702, loss=61.013, backward_time=1.029, grad_norm=107.814, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.651e-05, train_time=2.742
+[gpub005:0/64] 2023-07-07 21:54:53,208 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=85.578, loss_att=64.328, acc=0.701, loss=70.703, backward_time=1.026, grad_norm=95.956, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.649e-05, train_time=2.719
+[gpub005:0/64] 2023-07-07 21:57:09,044 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=74.007, loss_att=59.821, acc=0.714, loss=64.077, backward_time=1.026, grad_norm=88.845, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.647e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 21:59:39,441 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=67.054, loss_att=49.799, acc=0.712, loss=54.975, backward_time=1.033, grad_norm=97.210, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.645e-05, train_time=3.008
+[gpub005:0/64] 2023-07-07 22:01:56,581 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=78.400, loss_att=56.731, acc=0.705, loss=63.232, backward_time=1.027, grad_norm=108.203, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.644e-05, train_time=2.743
+[gpub005:0/64] 2023-07-07 22:04:12,336 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=1.080e-04, forward_time=0.144, loss_ctc=74.332, loss_att=57.767, acc=0.700, loss=62.737, backward_time=1.025, grad_norm=119.370, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.642e-05, train_time=2.715
+[gpub005:0/64] 2023-07-07 22:05:44,331 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-07 22:06:02,568 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 22:06:05,982 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa03810cd60>)
+[gpub005:0/64] 2023-07-07 22:06:05,982 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-07 22:06:05,988 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 22:10:46,670 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=1.259, forward_time=0.154, loss_ctc=66.157, loss_att=48.178, acc=0.703, loss=53.572, backward_time=1.035, grad_norm=89.639, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.640e-05, train_time=7.886
+[gpub005:0/64] 2023-07-07 22:13:02,991 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=69.203, loss_att=58.283, acc=0.704, loss=61.559, backward_time=1.026, grad_norm=86.447, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.638e-05, train_time=2.727
+[gpub005:0/64] 2023-07-07 22:15:18,474 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.017e-04, forward_time=0.143, loss_ctc=71.569, loss_att=54.837, acc=0.692, loss=59.857, backward_time=1.023, grad_norm=112.955, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.636e-05, train_time=2.709
+[gpub005:0/64] 2023-07-07 22:17:33,912 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.015e-04, forward_time=0.143, loss_ctc=83.345, loss_att=66.902, acc=0.686, loss=71.835, backward_time=1.023, grad_norm=107.092, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.635e-05, train_time=2.709
+[gpub005:0/64] 2023-07-07 22:19:49,701 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.031e-04, forward_time=0.144, loss_ctc=72.806, loss_att=58.981, acc=0.706, loss=63.129, backward_time=1.024, grad_norm=108.783, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.633e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 22:22:05,139 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.114e-04, forward_time=0.143, loss_ctc=71.685, loss_att=55.501, acc=0.700, loss=60.356, backward_time=1.022, grad_norm=110.110, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.631e-05, train_time=2.709
+[gpub005:0/64] 2023-07-07 22:24:20,485 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=1.072e-04, forward_time=0.143, loss_ctc=79.087, loss_att=57.787, acc=0.699, loss=64.177, backward_time=1.021, grad_norm=100.805, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.629e-05, train_time=2.707
+[gpub005:0/64] 2023-07-07 22:26:35,978 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.091e-04, forward_time=0.144, loss_ctc=73.503, loss_att=58.364, acc=0.689, loss=62.906, backward_time=1.023, grad_norm=109.600, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.628e-05, train_time=2.710
+[gpub005:0/64] 2023-07-07 22:28:51,076 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=1.202e-04, forward_time=0.143, loss_ctc=64.689, loss_att=54.362, acc=0.690, loss=57.460, backward_time=1.021, grad_norm=152.209, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.626e-05, train_time=2.702
+[gpub005:0/64] 2023-07-07 22:28:55,889 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-07 22:29:14,276 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 22:29:17,689 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b497f520>)
+[gpub005:0/64] 2023-07-07 22:29:17,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-07 22:29:17,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 22:37:00,414 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.337, forward_time=0.170, loss_ctc=73.081, loss_att=56.227, acc=0.704, loss=61.283, backward_time=1.034, grad_norm=153.240, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.624e-05, train_time=9.787
+[gpub005:0/64] 2023-07-07 22:39:16,136 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=1.064e-04, forward_time=0.145, loss_ctc=64.449, loss_att=53.017, acc=0.688, loss=56.447, backward_time=1.022, grad_norm=97.208, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.622e-05, train_time=2.714
+[gpub005:0/64] 2023-07-07 22:41:31,530 (trainer:732) INFO: 23epoch:train:5201-5300batch: iter_time=1.159e-04, forward_time=0.144, loss_ctc=84.647, loss_att=63.396, acc=0.696, loss=69.772, backward_time=1.022, grad_norm=129.990, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.620e-05, train_time=2.708
+[gpub005:0/64] 2023-07-07 22:43:47,077 (trainer:732) INFO: 23epoch:train:5301-5400batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=71.859, loss_att=58.614, acc=0.699, loss=62.588, backward_time=1.024, grad_norm=94.894, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.619e-05, train_time=2.711
+[gpub005:0/64] 2023-07-07 22:46:02,833 (trainer:732) INFO: 23epoch:train:5401-5500batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=77.229, loss_att=59.686, acc=0.711, loss=64.949, backward_time=1.026, grad_norm=107.512, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.182, optim0_lr0=7.617e-05, train_time=2.715
+[gpub005:0/64] 2023-07-07 22:48:18,143 (trainer:732) INFO: 23epoch:train:5501-5600batch: iter_time=1.116e-04, forward_time=0.144, loss_ctc=67.891, loss_att=54.609, acc=0.697, loss=58.593, backward_time=1.023, grad_norm=102.083, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.615e-05, train_time=2.706
+[gpub005:0/64] 2023-07-07 22:50:34,120 (trainer:732) INFO: 23epoch:train:5601-5700batch: iter_time=1.281e-04, forward_time=0.147, loss_ctc=77.159, loss_att=58.325, acc=0.695, loss=63.975, backward_time=1.027, grad_norm=116.273, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.613e-05, train_time=2.719
+[gpub005:0/64] 2023-07-07 22:52:49,277 (trainer:732) INFO: 23epoch:train:5701-5800batch: iter_time=1.458e-04, forward_time=0.144, loss_ctc=71.209, loss_att=54.625, acc=0.696, loss=59.601, backward_time=1.023, grad_norm=92.611, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.612e-05, train_time=2.703
+[gpub005:0/64] 2023-07-07 22:53:48,189 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-07 22:54:06,512 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 22:54:10,013 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b56df7c0>)
+[gpub005:0/64] 2023-07-07 22:54:10,013 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-07 22:54:10,019 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 22:58:51,643 (trainer:732) INFO: 23epoch:train:5801-5900batch: iter_time=2.200, forward_time=0.169, loss_ctc=67.539, loss_att=50.843, acc=0.696, loss=55.852, backward_time=1.035, grad_norm=101.336, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.183, optim0_lr0=7.610e-05, train_time=7.247
+[gpub005:0/64] 2023-07-07 23:01:06,844 (trainer:732) INFO: 23epoch:train:5901-6000batch: iter_time=1.276e-04, forward_time=0.143, loss_ctc=69.923, loss_att=54.611, acc=0.709, loss=59.204, backward_time=1.021, grad_norm=89.503, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.181, optim0_lr0=7.608e-05, train_time=2.704
+[gpub005:0/64] 2023-07-07 23:03:22,210 (trainer:732) INFO: 23epoch:train:6001-6100batch: iter_time=1.177e-04, forward_time=0.143, loss_ctc=73.309, loss_att=56.219, acc=0.695, loss=61.346, backward_time=1.023, grad_norm=108.599, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.606e-05, train_time=2.707
+[gpub005:0/64] 2023-07-07 23:05:37,636 (trainer:732) INFO: 23epoch:train:6101-6200batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=82.117, loss_att=62.696, acc=0.695, loss=68.522, backward_time=1.023, grad_norm=97.686, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.605e-05, train_time=2.708
+[gpub005:0/64] 2023-07-07 23:07:53,122 (trainer:732) INFO: 23epoch:train:6201-6300batch: iter_time=1.115e-04, forward_time=0.143, loss_ctc=75.087, loss_att=60.028, acc=0.711, loss=64.546, backward_time=1.024, grad_norm=97.876, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.603e-05, train_time=2.709
+[gpub005:0/64] 2023-07-07 23:10:08,013 (trainer:732) INFO: 23epoch:train:6301-6400batch: iter_time=1.160e-04, forward_time=0.142, loss_ctc=66.502, loss_att=51.440, acc=0.704, loss=55.959, backward_time=1.020, grad_norm=103.605, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.601e-05, train_time=2.698
+[gpub005:0/64] 2023-07-07 23:12:24,456 (trainer:732) INFO: 23epoch:train:6401-6500batch: iter_time=1.169e-04, forward_time=0.143, loss_ctc=78.370, loss_att=56.212, acc=0.703, loss=62.859, backward_time=1.026, grad_norm=104.276, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.599e-05, train_time=2.729
+[gpub005:0/64] 2023-07-07 23:14:39,768 (trainer:732) INFO: 23epoch:train:6501-6600batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=75.661, loss_att=58.381, acc=0.689, loss=63.565, backward_time=1.022, grad_norm=120.247, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.180, optim0_lr0=7.598e-05, train_time=2.706
+[gpub005:0/64] 2023-07-07 23:16:16,616 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-07 23:16:34,573 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 23:16:37,987 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0389177c0>)
+[gpub005:0/64] 2023-07-07 23:16:37,987 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-07 23:16:37,993 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 23:21:16,629 (trainer:732) INFO: 23epoch:train:6601-6700batch: iter_time=1.778, forward_time=0.153, loss_ctc=64.231, loss_att=49.639, acc=0.702, loss=54.016, backward_time=1.033, grad_norm=111.751, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.596e-05, train_time=7.937
+[gpub005:0/64] 2023-07-07 23:23:32,863 (trainer:732) INFO: 23epoch:train:6701-6800batch: iter_time=1.291e-04, forward_time=0.144, loss_ctc=69.473, loss_att=58.857, acc=0.712, loss=62.042, backward_time=1.027, grad_norm=95.279, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.594e-05, train_time=2.724
+[gpub005:0/64] 2023-07-07 23:25:48,593 (trainer:732) INFO: 23epoch:train:6801-6900batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=67.596, loss_att=53.379, acc=0.699, loss=57.644, backward_time=1.025, grad_norm=117.466, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.592e-05, train_time=2.714
+[gpub005:0/64] 2023-07-07 23:28:04,655 (trainer:732) INFO: 23epoch:train:6901-7000batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=86.976, loss_att=65.598, acc=0.698, loss=72.012, backward_time=1.028, grad_norm=129.974, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.591e-05, train_time=2.721
+[gpub005:0/64] 2023-07-07 23:30:20,292 (trainer:732) INFO: 23epoch:train:7001-7100batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=67.406, loss_att=49.368, acc=0.731, loss=54.780, backward_time=1.026, grad_norm=112.193, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.589e-05, train_time=2.713
+[gpub005:0/64] 2023-07-07 23:32:36,124 (trainer:732) INFO: 23epoch:train:7101-7200batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=76.719, loss_att=60.990, acc=0.711, loss=65.708, backward_time=1.026, grad_norm=110.570, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.587e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 23:34:51,928 (trainer:732) INFO: 23epoch:train:7201-7300batch: iter_time=1.130e-04, forward_time=0.145, loss_ctc=74.869, loss_att=56.094, acc=0.706, loss=61.727, backward_time=1.025, grad_norm=110.042, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.585e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 23:37:07,590 (trainer:732) INFO: 23epoch:train:7301-7400batch: iter_time=1.190e-04, forward_time=0.144, loss_ctc=73.682, loss_att=57.404, acc=0.694, loss=62.287, backward_time=1.025, grad_norm=131.163, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.584e-05, train_time=2.713
+[gpub005:0/64] 2023-07-07 23:39:22,887 (trainer:732) INFO: 23epoch:train:7401-7500batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=66.339, loss_att=49.969, acc=0.710, loss=54.880, backward_time=1.023, grad_norm=103.995, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.582e-05, train_time=2.706
+[gpub005:0/64] 2023-07-07 23:39:24,295 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-07 23:39:42,594 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-07 23:39:46,092 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b4ddb7c0>)
+[gpub005:0/64] 2023-07-07 23:39:46,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-07 23:39:46,098 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-07 23:45:40,419 (trainer:732) INFO: 23epoch:train:7501-7600batch: iter_time=1.251, forward_time=0.163, loss_ctc=71.968, loss_att=56.021, acc=0.710, loss=60.805, backward_time=1.036, grad_norm=115.094, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.580e-05, train_time=7.550
+[gpub005:0/64] 2023-07-07 23:47:56,185 (trainer:732) INFO: 23epoch:train:7601-7700batch: iter_time=1.157e-04, forward_time=0.144, loss_ctc=64.028, loss_att=53.023, acc=0.693, loss=56.324, backward_time=1.023, grad_norm=96.768, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.578e-05, train_time=2.715
+[gpub005:0/64] 2023-07-07 23:50:12,012 (trainer:732) INFO: 23epoch:train:7701-7800batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=82.929, loss_att=60.230, acc=0.703, loss=67.040, backward_time=1.027, grad_norm=101.875, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.577e-05, train_time=2.716
+[gpub005:0/64] 2023-07-07 23:52:27,130 (trainer:732) INFO: 23epoch:train:7801-7900batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=73.151, loss_att=59.489, acc=0.697, loss=63.588, backward_time=1.021, grad_norm=94.433, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.575e-05, train_time=2.702
+[gpub005:0/64] 2023-07-07 23:54:42,527 (trainer:732) INFO: 23epoch:train:7901-8000batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=77.527, loss_att=59.793, acc=0.707, loss=65.113, backward_time=1.024, grad_norm=97.630, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.573e-05, train_time=2.708
+[gpub005:0/64] 2023-07-07 23:56:57,972 (trainer:732) INFO: 23epoch:train:8001-8100batch: iter_time=1.089e-04, forward_time=0.143, loss_ctc=67.062, loss_att=52.670, acc=0.703, loss=56.987, backward_time=1.024, grad_norm=91.986, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.571e-05, train_time=2.709
+[gpub005:0/64] 2023-07-07 23:59:13,305 (trainer:732) INFO: 23epoch:train:8101-8200batch: iter_time=1.038e-04, forward_time=0.144, loss_ctc=77.397, loss_att=56.849, acc=0.699, loss=63.014, backward_time=1.023, grad_norm=101.149, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.570e-05, train_time=2.706
+[gpub005:0/64] 2023-07-08 00:01:28,395 (trainer:732) INFO: 23epoch:train:8201-8300batch: iter_time=1.169e-04, forward_time=0.143, loss_ctc=70.457, loss_att=54.556, acc=0.695, loss=59.327, backward_time=1.022, grad_norm=87.648, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.568e-05, train_time=2.702
+[gpub005:0/64] 2023-07-08 00:02:14,379 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-08 00:02:33,090 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 00:02:36,612 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0022c7fd0>)
+[gpub005:0/64] 2023-07-08 00:02:36,612 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-08 00:02:36,618 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 00:08:27,251 (trainer:732) INFO: 23epoch:train:8301-8400batch: iter_time=1.283, forward_time=0.155, loss_ctc=69.792, loss_att=51.702, acc=0.705, loss=57.129, backward_time=1.036, grad_norm=94.302, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.183, optim0_lr0=7.566e-05, train_time=8.377
+[gpub005:0/64] 2023-07-08 00:10:43,043 (trainer:732) INFO: 23epoch:train:8401-8500batch: iter_time=1.183e-04, forward_time=0.143, loss_ctc=64.409, loss_att=49.778, acc=0.698, loss=54.167, backward_time=1.024, grad_norm=88.422, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.564e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 00:12:58,756 (trainer:732) INFO: 23epoch:train:8501-8600batch: iter_time=1.104e-04, forward_time=0.143, loss_ctc=80.753, loss_att=62.383, acc=0.696, loss=67.894, backward_time=1.023, grad_norm=120.710, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.563e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 00:15:15,421 (trainer:732) INFO: 23epoch:train:8601-8700batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=72.795, loss_att=55.598, acc=0.703, loss=60.757, backward_time=1.026, grad_norm=94.499, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.561e-05, train_time=2.733
+[gpub005:0/64] 2023-07-08 00:17:31,354 (trainer:732) INFO: 23epoch:train:8701-8800batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=76.197, loss_att=62.131, acc=0.706, loss=66.351, backward_time=1.028, grad_norm=103.489, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.559e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 00:19:46,862 (trainer:732) INFO: 23epoch:train:8801-8900batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=66.628, loss_att=51.943, acc=0.710, loss=56.349, backward_time=1.025, grad_norm=99.280, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.558e-05, train_time=2.710
+[gpub005:0/64] 2023-07-08 00:22:10,051 (trainer:732) INFO: 23epoch:train:8901-9000batch: iter_time=1.057e-04, forward_time=0.145, loss_ctc=76.575, loss_att=56.267, acc=0.696, loss=62.360, backward_time=1.039, grad_norm=108.949, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.556e-05, train_time=2.864
+[gpub005:0/64] 2023-07-08 00:24:25,632 (trainer:732) INFO: 23epoch:train:9001-9100batch: iter_time=9.660e-05, forward_time=0.144, loss_ctc=69.003, loss_att=54.450, acc=0.697, loss=58.816, backward_time=1.024, grad_norm=89.585, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.554e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 00:25:57,557 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-08 00:26:15,971 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 00:26:19,431 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97d2c57280>)
+[gpub005:0/64] 2023-07-08 00:26:19,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-08 00:26:19,438 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 00:30:19,119 (trainer:732) INFO: 23epoch:train:9101-9200batch: iter_time=1.292, forward_time=0.171, loss_ctc=67.027, loss_att=52.830, acc=0.700, loss=57.089, backward_time=1.038, grad_norm=110.918, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.552e-05, train_time=7.069
+[gpub005:0/64] 2023-07-08 00:32:58,750 (trainer:732) INFO: 23epoch:train:9201-9300batch: iter_time=1.301e-04, forward_time=0.145, loss_ctc=68.474, loss_att=55.236, acc=0.708, loss=59.208, backward_time=1.045, grad_norm=95.325, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.182, optim0_lr0=7.551e-05, train_time=3.193
+[gpub005:0/64] 2023-07-08 00:35:17,283 (trainer:732) INFO: 23epoch:train:9301-9400batch: iter_time=1.271e-04, forward_time=0.144, loss_ctc=67.455, loss_att=52.020, acc=0.693, loss=56.651, backward_time=1.026, grad_norm=96.518, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.549e-05, train_time=2.770
+[gpub005:0/64] 2023-07-08 00:37:36,935 (trainer:732) INFO: 23epoch:train:9401-9500batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=85.642, loss_att=64.226, acc=0.699, loss=70.651, backward_time=1.034, grad_norm=105.352, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.547e-05, train_time=2.793
+[gpub005:0/64] 2023-07-08 00:40:08,217 (trainer:732) INFO: 23epoch:train:9501-9600batch: iter_time=1.427e-04, forward_time=0.143, loss_ctc=65.434, loss_att=49.351, acc=0.723, loss=54.176, backward_time=1.050, grad_norm=100.205, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.545e-05, train_time=3.025
+[gpub005:0/64] 2023-07-08 00:42:32,011 (trainer:732) INFO: 23epoch:train:9601-9700batch: iter_time=1.393e-04, forward_time=0.145, loss_ctc=75.234, loss_att=58.800, acc=0.713, loss=63.730, backward_time=1.038, grad_norm=121.320, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.544e-05, train_time=2.876
+[gpub005:0/64] 2023-07-08 00:44:48,244 (trainer:732) INFO: 23epoch:train:9701-9800batch: iter_time=1.236e-04, forward_time=0.143, loss_ctc=74.479, loss_att=58.067, acc=0.696, loss=62.991, backward_time=1.022, grad_norm=101.161, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.542e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 00:47:03,649 (trainer:732) INFO: 23epoch:train:9801-9900batch: iter_time=1.282e-04, forward_time=0.144, loss_ctc=72.052, loss_att=55.612, acc=0.692, loss=60.544, backward_time=1.023, grad_norm=96.421, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.181, optim0_lr0=7.540e-05, train_time=2.708
+[gpub005:0/64] 2023-07-08 00:49:18,598 (trainer:732) INFO: 23epoch:train:9901-10000batch: iter_time=1.172e-04, forward_time=0.143, loss_ctc=65.433, loss_att=49.258, acc=0.705, loss=54.110, backward_time=1.020, grad_norm=96.484, clip=100.000, loss_scale=1.181e+21, optim_step_time=0.180, optim0_lr0=7.539e-05, train_time=2.699
+[gpub005:0/64] 2023-07-08 01:02:22,452 (trainer:338) INFO: 23epoch results: [train] iter_time=0.181, forward_time=0.147, loss_ctc=73.369, loss_att=56.779, acc=0.701, loss=61.756, backward_time=1.027, grad_norm=108.052, clip=100.000, loss_scale=7.674e+20, optim_step_time=0.181, optim0_lr0=7.625e-05, train_time=3.358, time=4 hours, 40 minutes and 12.58 seconds, total_count=200000, gpu_max_cached_mem_GB=34.934, [valid] loss_ctc=49.622, cer_ctc=0.280, loss_att=39.678, acc=0.677, cer=0.355, wer=0.989, loss=42.661, time=6 minutes and 51.55 seconds, total_count=20746, gpu_max_cached_mem_GB=38.229, [att_plot] time=5 minutes and 52.75 seconds, total_count=0, gpu_max_cached_mem_GB=38.229
+[gpub005:0/64] 2023-07-08 01:02:38,315 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub005:0/64] 2023-07-08 01:02:38,323 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/15epoch.pth
+[gpub005:0/64] 2023-07-08 01:02:38,323 (trainer:272) INFO: 24/30epoch started. Estimated time to finish: 1 day, 10 hours and 12 minutes
+[gpub005:0/64] 2023-07-08 01:02:38,327 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-08 01:02:56,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 01:03:00,105 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f99b28eb5e0>)
+[gpub005:0/64] 2023-07-08 01:03:00,105 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-08 01:03:00,112 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 01:07:13,542 (trainer:732) INFO: 24epoch:train:1-100batch: iter_time=1.294, forward_time=0.197, loss_ctc=66.378, loss_att=54.002, acc=0.692, loss=57.715, backward_time=1.047, grad_norm=88.560, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.185, optim0_lr0=7.537e-05, train_time=5.504
+[gpub005:0/64] 2023-07-08 01:09:29,832 (trainer:732) INFO: 24epoch:train:101-200batch: iter_time=1.237e-04, forward_time=0.149, loss_ctc=75.563, loss_att=57.087, acc=0.702, loss=62.630, backward_time=1.029, grad_norm=107.763, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.535e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 01:11:47,280 (trainer:732) INFO: 24epoch:train:201-300batch: iter_time=1.254e-04, forward_time=0.156, loss_ctc=81.651, loss_att=60.133, acc=0.708, loss=66.588, backward_time=1.031, grad_norm=101.534, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.185, optim0_lr0=7.533e-05, train_time=2.749
+[gpub005:0/64] 2023-07-08 01:14:08,521 (trainer:732) INFO: 24epoch:train:301-400batch: iter_time=1.208e-04, forward_time=0.151, loss_ctc=74.333, loss_att=59.376, acc=0.682, loss=63.863, backward_time=1.045, grad_norm=90.752, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.186, optim0_lr0=7.532e-05, train_time=2.825
+[gpub005:0/64] 2023-07-08 01:16:26,875 (trainer:732) INFO: 24epoch:train:401-500batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=76.016, loss_att=57.684, acc=0.693, loss=63.183, backward_time=1.029, grad_norm=111.603, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.530e-05, train_time=2.767
+[gpub005:0/64] 2023-07-08 01:18:46,441 (trainer:732) INFO: 24epoch:train:501-600batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=75.027, loss_att=55.807, acc=0.692, loss=61.573, backward_time=1.033, grad_norm=97.334, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.528e-05, train_time=2.791
+[gpub005:0/64] 2023-07-08 01:21:21,783 (trainer:732) INFO: 24epoch:train:601-700batch: iter_time=1.134e-04, forward_time=0.168, loss_ctc=84.656, loss_att=61.793, acc=0.683, loss=68.652, backward_time=1.043, grad_norm=104.486, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.527e-05, train_time=3.107
+[gpub005:0/64] 2023-07-08 01:23:42,605 (trainer:732) INFO: 24epoch:train:701-800batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=66.495, loss_att=52.472, acc=0.706, loss=56.679, backward_time=1.032, grad_norm=118.761, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.525e-05, train_time=2.816
+[gpub005:0/64] 2023-07-08 01:24:45,692 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-08 01:25:03,063 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 01:25:06,403 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93f253e260>)
+[gpub005:0/64] 2023-07-08 01:25:06,403 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-08 01:25:06,447 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 01:31:05,985 (trainer:732) INFO: 24epoch:train:801-900batch: iter_time=2.913, forward_time=0.166, loss_ctc=70.482, loss_att=54.429, acc=0.689, loss=59.245, backward_time=1.040, grad_norm=93.942, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.523e-05, train_time=8.867
+[gpub005:0/64] 2023-07-08 01:33:23,595 (trainer:732) INFO: 24epoch:train:901-1000batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=74.153, loss_att=57.876, acc=0.710, loss=62.759, backward_time=1.032, grad_norm=94.097, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.522e-05, train_time=2.752
+[gpub005:0/64] 2023-07-08 01:35:39,445 (trainer:732) INFO: 24epoch:train:1001-1100batch: iter_time=1.270e-04, forward_time=0.146, loss_ctc=82.574, loss_att=61.700, acc=0.704, loss=67.962, backward_time=1.028, grad_norm=112.668, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.520e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 01:37:55,479 (trainer:732) INFO: 24epoch:train:1101-1200batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=72.525, loss_att=54.759, acc=0.693, loss=60.089, backward_time=1.029, grad_norm=96.496, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.518e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 01:40:11,488 (trainer:732) INFO: 24epoch:train:1201-1300batch: iter_time=1.179e-04, forward_time=0.146, loss_ctc=70.836, loss_att=59.985, acc=0.697, loss=63.240, backward_time=1.029, grad_norm=86.809, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.516e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 01:42:27,421 (trainer:732) INFO: 24epoch:train:1301-1400batch: iter_time=1.202e-04, forward_time=0.147, loss_ctc=77.993, loss_att=57.963, acc=0.699, loss=63.972, backward_time=1.028, grad_norm=95.498, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.515e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 01:44:43,520 (trainer:732) INFO: 24epoch:train:1401-1500batch: iter_time=1.235e-04, forward_time=0.148, loss_ctc=81.518, loss_att=62.066, acc=0.696, loss=67.902, backward_time=1.030, grad_norm=106.767, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.513e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 01:46:59,478 (trainer:732) INFO: 24epoch:train:1501-1600batch: iter_time=1.160e-04, forward_time=0.147, loss_ctc=66.753, loss_att=53.187, acc=0.700, loss=57.257, backward_time=1.028, grad_norm=93.450, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.511e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 01:48:35,879 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-08 01:48:54,179 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 01:48:57,625 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0398e3e50>)
+[gpub005:0/64] 2023-07-08 01:48:57,626 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-08 01:48:57,632 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 01:52:29,456 (trainer:732) INFO: 24epoch:train:1601-1700batch: iter_time=1.330, forward_time=0.147, loss_ctc=65.681, loss_att=48.919, acc=0.702, loss=53.948, backward_time=1.042, grad_norm=84.444, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.510e-05, train_time=6.599
+[gpub005:0/64] 2023-07-08 01:54:45,379 (trainer:732) INFO: 24epoch:train:1701-1800batch: iter_time=1.307e-04, forward_time=0.145, loss_ctc=74.623, loss_att=57.753, acc=0.695, loss=62.814, backward_time=1.027, grad_norm=97.777, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.508e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 01:57:01,045 (trainer:732) INFO: 24epoch:train:1801-1900batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=76.935, loss_att=56.373, acc=0.708, loss=62.542, backward_time=1.026, grad_norm=120.929, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.506e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 01:59:16,723 (trainer:732) INFO: 24epoch:train:1901-2000batch: iter_time=1.132e-04, forward_time=0.147, loss_ctc=71.092, loss_att=51.859, acc=0.702, loss=57.629, backward_time=1.027, grad_norm=100.553, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.505e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 02:01:32,452 (trainer:732) INFO: 24epoch:train:2001-2100batch: iter_time=1.238e-04, forward_time=0.146, loss_ctc=72.789, loss_att=61.846, acc=0.686, loss=65.129, backward_time=1.026, grad_norm=98.884, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.503e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 02:03:48,170 (trainer:732) INFO: 24epoch:train:2101-2200batch: iter_time=1.204e-04, forward_time=0.145, loss_ctc=78.081, loss_att=55.115, acc=0.697, loss=62.005, backward_time=1.027, grad_norm=100.800, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.501e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 02:06:03,949 (trainer:732) INFO: 24epoch:train:2201-2300batch: iter_time=1.177e-04, forward_time=0.146, loss_ctc=80.049, loss_att=63.948, acc=0.678, loss=68.779, backward_time=1.026, grad_norm=114.705, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.499e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 02:08:19,737 (trainer:732) INFO: 24epoch:train:2301-2400batch: iter_time=1.194e-04, forward_time=0.146, loss_ctc=65.402, loss_att=53.461, acc=0.704, loss=57.043, backward_time=1.028, grad_norm=91.034, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.498e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 02:10:35,472 (trainer:732) INFO: 24epoch:train:2401-2500batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=72.613, loss_att=52.477, acc=0.698, loss=58.518, backward_time=1.026, grad_norm=99.117, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.496e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 02:10:37,997 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-08 02:10:56,202 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 02:10:59,652 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949735f4c0>)
+[gpub005:0/64] 2023-07-08 02:10:59,652 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-08 02:10:59,658 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 02:16:07,690 (trainer:732) INFO: 24epoch:train:2501-2600batch: iter_time=1.217, forward_time=0.174, loss_ctc=63.381, loss_att=52.778, acc=0.701, loss=55.959, backward_time=1.043, grad_norm=89.376, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.494e-05, train_time=6.644
+[gpub005:0/64] 2023-07-08 02:18:23,643 (trainer:732) INFO: 24epoch:train:2601-2700batch: iter_time=1.350e-04, forward_time=0.146, loss_ctc=75.419, loss_att=55.105, acc=0.702, loss=61.199, backward_time=1.026, grad_norm=92.859, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.493e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 02:20:39,387 (trainer:732) INFO: 24epoch:train:2701-2800batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=80.075, loss_att=59.644, acc=0.711, loss=65.773, backward_time=1.028, grad_norm=92.749, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.491e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 02:22:55,022 (trainer:732) INFO: 24epoch:train:2801-2900batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=74.639, loss_att=60.582, acc=0.683, loss=64.799, backward_time=1.027, grad_norm=100.800, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.489e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 02:25:10,817 (trainer:732) INFO: 24epoch:train:2901-3000batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=72.616, loss_att=55.817, acc=0.697, loss=60.856, backward_time=1.027, grad_norm=106.716, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.488e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 02:27:26,850 (trainer:732) INFO: 24epoch:train:3001-3100batch: iter_time=1.242e-04, forward_time=0.147, loss_ctc=73.982, loss_att=56.264, acc=0.692, loss=61.579, backward_time=1.030, grad_norm=106.373, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.486e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 02:29:42,652 (trainer:732) INFO: 24epoch:train:3101-3200batch: iter_time=1.253e-04, forward_time=0.146, loss_ctc=79.191, loss_att=60.052, acc=0.687, loss=65.793, backward_time=1.029, grad_norm=123.443, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.484e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 02:31:58,456 (trainer:732) INFO: 24epoch:train:3201-3300batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=64.577, loss_att=50.815, acc=0.710, loss=54.944, backward_time=1.027, grad_norm=96.139, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.483e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 02:32:57,662 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-08 02:33:16,507 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 02:33:19,930 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa038f4bfd0>)
+[gpub005:0/64] 2023-07-08 02:33:19,930 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-08 02:33:19,936 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 02:39:02,932 (trainer:732) INFO: 24epoch:train:3301-3400batch: iter_time=1.245, forward_time=0.157, loss_ctc=68.788, loss_att=57.316, acc=0.692, loss=60.757, backward_time=1.054, grad_norm=97.600, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.184, optim0_lr0=7.481e-05, train_time=8.489
+[gpub005:0/64] 2023-07-08 02:41:19,229 (trainer:732) INFO: 24epoch:train:3401-3500batch: iter_time=1.235e-04, forward_time=0.147, loss_ctc=72.583, loss_att=54.522, acc=0.715, loss=59.940, backward_time=1.029, grad_norm=105.703, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.479e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 02:43:35,703 (trainer:732) INFO: 24epoch:train:3501-3600batch: iter_time=1.213e-04, forward_time=0.146, loss_ctc=77.004, loss_att=55.591, acc=0.714, loss=62.015, backward_time=1.028, grad_norm=112.170, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.478e-05, train_time=2.729
+[gpub005:0/64] 2023-07-08 02:45:51,850 (trainer:732) INFO: 24epoch:train:3601-3700batch: iter_time=1.155e-04, forward_time=0.146, loss_ctc=71.437, loss_att=55.246, acc=0.700, loss=60.103, backward_time=1.029, grad_norm=102.536, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.476e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 02:48:08,047 (trainer:732) INFO: 24epoch:train:3701-3800batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=73.706, loss_att=63.883, acc=0.693, loss=66.830, backward_time=1.029, grad_norm=94.452, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.474e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 02:50:24,069 (trainer:732) INFO: 24epoch:train:3801-3900batch: iter_time=1.117e-04, forward_time=0.146, loss_ctc=75.479, loss_att=54.094, acc=0.707, loss=60.510, backward_time=1.029, grad_norm=97.153, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.473e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 02:52:39,889 (trainer:732) INFO: 24epoch:train:3901-4000batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=78.941, loss_att=58.000, acc=0.700, loss=64.282, backward_time=1.027, grad_norm=111.973, clip=100.000, loss_scale=2.361e+21, optim_step_time=0.183, optim0_lr0=7.471e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 02:54:55,730 (trainer:732) INFO: 24epoch:train:4001-4100batch: iter_time=1.273e-04, forward_time=0.146, loss_ctc=66.834, loss_att=54.123, acc=0.701, loss=57.936, backward_time=1.027, grad_norm=96.300, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.469e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 02:56:27,946 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-08 02:56:45,945 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 02:56:49,390 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9f3183bb80>)
+[gpub005:0/64] 2023-07-08 02:56:49,390 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-08 02:56:49,396 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 03:01:09,171 (trainer:732) INFO: 24epoch:train:4101-4200batch: iter_time=1.248, forward_time=0.161, loss_ctc=71.173, loss_att=57.787, acc=0.718, loss=61.802, backward_time=1.042, grad_norm=102.778, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.468e-05, train_time=7.469
+[gpub005:0/64] 2023-07-08 03:03:25,586 (trainer:732) INFO: 24epoch:train:4201-4300batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=66.106, loss_att=52.017, acc=0.708, loss=56.244, backward_time=1.033, grad_norm=89.218, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.466e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 03:05:41,834 (trainer:732) INFO: 24epoch:train:4301-4400batch: iter_time=1.221e-04, forward_time=0.146, loss_ctc=71.008, loss_att=49.565, acc=0.721, loss=55.998, backward_time=1.030, grad_norm=101.755, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.464e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 03:07:57,995 (trainer:732) INFO: 24epoch:train:4401-4500batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=77.163, loss_att=57.856, acc=0.709, loss=63.648, backward_time=1.029, grad_norm=109.828, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.463e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 03:10:13,988 (trainer:732) INFO: 24epoch:train:4501-4600batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=76.375, loss_att=64.397, acc=0.699, loss=67.990, backward_time=1.029, grad_norm=107.384, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.461e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 03:12:30,013 (trainer:732) INFO: 24epoch:train:4601-4700batch: iter_time=1.345e-04, forward_time=0.148, loss_ctc=70.244, loss_att=51.300, acc=0.711, loss=56.983, backward_time=1.029, grad_norm=100.325, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.459e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 03:14:46,161 (trainer:732) INFO: 24epoch:train:4701-4800batch: iter_time=1.171e-04, forward_time=0.146, loss_ctc=77.063, loss_att=60.448, acc=0.691, loss=65.433, backward_time=1.031, grad_norm=99.011, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.458e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 03:17:07,246 (trainer:732) INFO: 24epoch:train:4801-4900batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=72.573, loss_att=53.899, acc=0.709, loss=59.501, backward_time=1.033, grad_norm=110.928, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.456e-05, train_time=2.821
+[gpub005:0/64] 2023-07-08 03:19:26,086 (trainer:732) INFO: 24epoch:train:4901-5000batch: iter_time=1.179e-04, forward_time=0.146, loss_ctc=71.710, loss_att=56.838, acc=0.706, loss=61.300, backward_time=1.032, grad_norm=102.143, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.454e-05, train_time=2.777
+[gpub005:0/64] 2023-07-08 03:19:27,571 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-08 03:19:45,853 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 03:19:49,250 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f935c23f520>)
+[gpub005:0/64] 2023-07-08 03:19:49,250 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-08 03:19:49,256 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 03:25:36,249 (trainer:732) INFO: 24epoch:train:5001-5100batch: iter_time=1.261, forward_time=0.144, loss_ctc=65.391, loss_att=54.498, acc=0.700, loss=57.766, backward_time=1.048, grad_norm=94.745, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.453e-05, train_time=7.403
+[gpub005:0/64] 2023-07-08 03:27:52,012 (trainer:732) INFO: 24epoch:train:5101-5200batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=72.872, loss_att=53.944, acc=0.711, loss=59.622, backward_time=1.026, grad_norm=120.048, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.451e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 03:30:07,787 (trainer:732) INFO: 24epoch:train:5201-5300batch: iter_time=9.830e-05, forward_time=0.145, loss_ctc=78.638, loss_att=59.184, acc=0.712, loss=65.020, backward_time=1.027, grad_norm=92.094, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.449e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 03:32:25,756 (trainer:732) INFO: 24epoch:train:5301-5400batch: iter_time=9.984e-05, forward_time=0.145, loss_ctc=74.505, loss_att=60.142, acc=0.682, loss=64.451, backward_time=1.044, grad_norm=103.493, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.448e-05, train_time=2.759
+[gpub005:0/64] 2023-07-08 03:34:42,021 (trainer:732) INFO: 24epoch:train:5401-5500batch: iter_time=9.984e-05, forward_time=0.145, loss_ctc=71.932, loss_att=56.685, acc=0.695, loss=61.259, backward_time=1.031, grad_norm=90.686, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.446e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 03:36:58,579 (trainer:732) INFO: 24epoch:train:5501-5600batch: iter_time=1.076e-04, forward_time=0.146, loss_ctc=73.486, loss_att=54.717, acc=0.701, loss=60.348, backward_time=1.032, grad_norm=106.312, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.444e-05, train_time=2.731
+[gpub005:0/64] 2023-07-08 03:39:14,325 (trainer:732) INFO: 24epoch:train:5601-5700batch: iter_time=9.864e-05, forward_time=0.145, loss_ctc=77.877, loss_att=59.762, acc=0.687, loss=65.197, backward_time=1.026, grad_norm=109.617, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.443e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 03:41:30,723 (trainer:732) INFO: 24epoch:train:5701-5800batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=65.162, loss_att=50.988, acc=0.716, loss=55.240, backward_time=1.032, grad_norm=98.285, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.441e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 03:42:16,993 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-08 03:42:34,998 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 03:42:38,488 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94f3d76a40>)
+[gpub005:0/64] 2023-07-08 03:42:38,488 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-08 03:42:38,494 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 03:47:54,199 (trainer:732) INFO: 24epoch:train:5801-5900batch: iter_time=1.236, forward_time=0.154, loss_ctc=69.112, loss_att=56.695, acc=0.701, loss=60.420, backward_time=1.042, grad_norm=120.681, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.439e-05, train_time=7.669
+[gpub005:0/64] 2023-07-08 03:50:22,625 (trainer:732) INFO: 24epoch:train:5901-6000batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=71.865, loss_att=55.398, acc=0.711, loss=60.338, backward_time=1.043, grad_norm=110.200, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.438e-05, train_time=2.968
+[gpub005:0/64] 2023-07-08 03:52:49,847 (trainer:732) INFO: 24epoch:train:6001-6100batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=76.266, loss_att=55.052, acc=0.717, loss=61.416, backward_time=1.051, grad_norm=98.498, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.436e-05, train_time=2.944
+[gpub005:0/64] 2023-07-08 03:55:19,854 (trainer:732) INFO: 24epoch:train:6101-6200batch: iter_time=1.249e-04, forward_time=0.156, loss_ctc=70.751, loss_att=53.614, acc=0.707, loss=58.755, backward_time=1.049, grad_norm=103.884, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.435e-05, train_time=3.000
+[gpub005:0/64] 2023-07-08 03:57:49,296 (trainer:732) INFO: 24epoch:train:6201-6300batch: iter_time=1.229e-04, forward_time=0.148, loss_ctc=76.058, loss_att=64.659, acc=0.695, loss=68.078, backward_time=1.065, grad_norm=100.653, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.184, optim0_lr0=7.433e-05, train_time=2.989
+[gpub005:0/64] 2023-07-08 04:00:09,479 (trainer:732) INFO: 24epoch:train:6301-6400batch: iter_time=1.223e-04, forward_time=0.145, loss_ctc=74.958, loss_att=54.669, acc=0.705, loss=60.756, backward_time=1.031, grad_norm=94.045, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.431e-05, train_time=2.803
+[gpub005:0/64] 2023-07-08 04:02:32,143 (trainer:732) INFO: 24epoch:train:6401-6500batch: iter_time=1.264e-04, forward_time=0.145, loss_ctc=78.123, loss_att=58.724, acc=0.701, loss=64.544, backward_time=1.040, grad_norm=109.338, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.430e-05, train_time=2.853
+[gpub005:0/64] 2023-07-08 04:04:51,610 (trainer:732) INFO: 24epoch:train:6501-6600batch: iter_time=1.316e-04, forward_time=0.145, loss_ctc=66.708, loss_att=54.417, acc=0.704, loss=58.104, backward_time=1.032, grad_norm=102.988, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.428e-05, train_time=2.789
+[gpub005:0/64] 2023-07-08 04:06:26,443 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-08 04:06:44,558 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 04:06:47,998 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f935c26ccd0>)
+[gpub005:0/64] 2023-07-08 04:06:47,998 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-08 04:06:48,004 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 04:09:57,980 (trainer:732) INFO: 24epoch:train:6601-6700batch: iter_time=1.411, forward_time=0.145, loss_ctc=70.865, loss_att=57.768, acc=0.718, loss=61.697, backward_time=1.040, grad_norm=96.931, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.426e-05, train_time=6.127
+[gpub005:0/64] 2023-07-08 04:12:15,222 (trainer:732) INFO: 24epoch:train:6701-6800batch: iter_time=1.273e-04, forward_time=0.145, loss_ctc=67.487, loss_att=52.036, acc=0.707, loss=56.671, backward_time=1.034, grad_norm=100.553, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.425e-05, train_time=2.745
+[gpub005:0/64] 2023-07-08 04:14:30,629 (trainer:732) INFO: 24epoch:train:6801-6900batch: iter_time=1.276e-04, forward_time=0.144, loss_ctc=71.361, loss_att=50.340, acc=0.711, loss=56.646, backward_time=1.025, grad_norm=111.542, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.423e-05, train_time=2.708
+[gpub005:0/64] 2023-07-08 04:16:46,581 (trainer:732) INFO: 24epoch:train:6901-7000batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=77.871, loss_att=57.029, acc=0.711, loss=63.282, backward_time=1.029, grad_norm=107.653, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.421e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 04:19:02,671 (trainer:732) INFO: 24epoch:train:7001-7100batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=76.050, loss_att=64.346, acc=0.697, loss=67.857, backward_time=1.029, grad_norm=91.851, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.420e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 04:21:18,512 (trainer:732) INFO: 24epoch:train:7101-7200batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=70.235, loss_att=51.323, acc=0.707, loss=56.997, backward_time=1.029, grad_norm=95.632, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.418e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 04:23:34,926 (trainer:732) INFO: 24epoch:train:7201-7300batch: iter_time=1.009e-04, forward_time=0.146, loss_ctc=74.110, loss_att=59.073, acc=0.684, loss=63.584, backward_time=1.033, grad_norm=116.090, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.417e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 04:25:50,893 (trainer:732) INFO: 24epoch:train:7301-7400batch: iter_time=1.053e-04, forward_time=0.145, loss_ctc=71.461, loss_att=54.905, acc=0.707, loss=59.872, backward_time=1.028, grad_norm=89.587, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.415e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 04:28:06,592 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-08 04:28:24,838 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 04:28:28,253 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9a18b9b790>)
+[gpub005:0/64] 2023-07-08 04:28:28,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-08 04:28:28,259 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 04:31:58,721 (trainer:732) INFO: 24epoch:train:7401-7500batch: iter_time=1.248, forward_time=0.157, loss_ctc=70.151, loss_att=54.443, acc=0.701, loss=59.156, backward_time=1.035, grad_norm=102.377, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.413e-05, train_time=7.356
+[gpub005:0/64] 2023-07-08 04:34:16,951 (trainer:732) INFO: 24epoch:train:7501-7600batch: iter_time=1.136e-04, forward_time=0.146, loss_ctc=64.218, loss_att=51.088, acc=0.702, loss=55.027, backward_time=1.035, grad_norm=94.349, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.412e-05, train_time=2.765
+[gpub005:0/64] 2023-07-08 04:36:33,188 (trainer:732) INFO: 24epoch:train:7601-7700batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=72.839, loss_att=52.242, acc=0.715, loss=58.421, backward_time=1.027, grad_norm=100.137, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.410e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 04:38:49,029 (trainer:732) INFO: 24epoch:train:7701-7800batch: iter_time=1.131e-04, forward_time=0.146, loss_ctc=77.897, loss_att=56.319, acc=0.710, loss=62.792, backward_time=1.029, grad_norm=96.600, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.408e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 04:41:04,866 (trainer:732) INFO: 24epoch:train:7801-7900batch: iter_time=1.253e-04, forward_time=0.145, loss_ctc=75.016, loss_att=64.152, acc=0.690, loss=67.411, backward_time=1.027, grad_norm=99.392, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.407e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 04:43:21,052 (trainer:732) INFO: 24epoch:train:7901-8000batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=69.861, loss_att=50.596, acc=0.706, loss=56.375, backward_time=1.031, grad_norm=85.637, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.405e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 04:45:36,903 (trainer:732) INFO: 24epoch:train:8001-8100batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=75.733, loss_att=59.098, acc=0.684, loss=64.089, backward_time=1.028, grad_norm=96.265, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.404e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 04:47:52,361 (trainer:732) INFO: 24epoch:train:8101-8200batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=71.279, loss_att=55.254, acc=0.703, loss=60.061, backward_time=1.025, grad_norm=104.118, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.402e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 04:50:08,165 (trainer:732) INFO: 24epoch:train:8201-8300batch: iter_time=1.132e-04, forward_time=0.145, loss_ctc=66.791, loss_att=54.537, acc=0.708, loss=58.213, backward_time=1.027, grad_norm=107.484, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.400e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 04:51:06,107 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-08 04:51:24,031 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 04:51:27,758 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b51f7790>)
+[gpub005:0/64] 2023-07-08 04:51:27,758 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-08 04:51:27,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 04:56:22,745 (trainer:732) INFO: 24epoch:train:8301-8400batch: iter_time=2.278, forward_time=0.194, loss_ctc=63.795, loss_att=46.168, acc=0.707, loss=51.456, backward_time=1.048, grad_norm=100.525, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.185, optim0_lr0=7.399e-05, train_time=7.491
+[gpub005:0/64] 2023-07-08 04:58:39,347 (trainer:732) INFO: 24epoch:train:8401-8500batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=70.673, loss_att=54.531, acc=0.718, loss=59.373, backward_time=1.027, grad_norm=101.049, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.397e-05, train_time=2.732
+[gpub005:0/64] 2023-07-08 05:00:56,190 (trainer:732) INFO: 24epoch:train:8501-8600batch: iter_time=1.362e-04, forward_time=0.146, loss_ctc=81.170, loss_att=59.636, acc=0.713, loss=66.096, backward_time=1.033, grad_norm=102.757, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.395e-05, train_time=2.737
+[gpub005:0/64] 2023-07-08 05:03:12,609 (trainer:732) INFO: 24epoch:train:8601-8700batch: iter_time=1.274e-04, forward_time=0.147, loss_ctc=70.543, loss_att=53.905, acc=0.698, loss=58.897, backward_time=1.033, grad_norm=98.360, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.394e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 05:05:28,730 (trainer:732) INFO: 24epoch:train:8701-8800batch: iter_time=1.292e-04, forward_time=0.147, loss_ctc=69.057, loss_att=58.090, acc=0.709, loss=61.380, backward_time=1.030, grad_norm=125.170, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.392e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 05:07:44,575 (trainer:732) INFO: 24epoch:train:8801-8900batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=77.481, loss_att=55.840, acc=0.710, loss=62.332, backward_time=1.029, grad_norm=96.606, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.391e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 05:10:00,543 (trainer:732) INFO: 24epoch:train:8901-9000batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=77.860, loss_att=60.412, acc=0.702, loss=65.647, backward_time=1.029, grad_norm=107.480, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.389e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 05:12:16,943 (trainer:732) INFO: 24epoch:train:9001-9100batch: iter_time=1.265e-04, forward_time=0.147, loss_ctc=64.228, loss_att=51.265, acc=0.713, loss=55.154, backward_time=1.032, grad_norm=103.336, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.387e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 05:13:50,730 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-08 05:14:09,041 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 05:14:12,445 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9a18bb47c0>)
+[gpub005:0/64] 2023-07-08 05:14:12,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-08 05:14:12,452 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 05:18:16,694 (trainer:732) INFO: 24epoch:train:9101-9200batch: iter_time=1.278, forward_time=0.147, loss_ctc=69.239, loss_att=55.541, acc=0.700, loss=59.650, backward_time=1.040, grad_norm=102.258, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.386e-05, train_time=7.195
+[gpub005:0/64] 2023-07-08 05:20:33,672 (trainer:732) INFO: 24epoch:train:9201-9300batch: iter_time=1.200e-04, forward_time=0.147, loss_ctc=65.579, loss_att=50.029, acc=0.720, loss=54.694, backward_time=1.032, grad_norm=84.538, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.384e-05, train_time=2.739
+[gpub005:0/64] 2023-07-08 05:22:50,622 (trainer:732) INFO: 24epoch:train:9301-9400batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=70.083, loss_att=49.325, acc=0.718, loss=55.553, backward_time=1.030, grad_norm=91.235, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.382e-05, train_time=2.739
+[gpub005:0/64] 2023-07-08 05:25:06,734 (trainer:732) INFO: 24epoch:train:9401-9500batch: iter_time=1.142e-04, forward_time=0.146, loss_ctc=77.209, loss_att=57.645, acc=0.710, loss=63.514, backward_time=1.026, grad_norm=95.607, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.182, optim0_lr0=7.381e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 05:27:22,855 (trainer:732) INFO: 24epoch:train:9501-9600batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=75.606, loss_att=64.748, acc=0.703, loss=68.006, backward_time=1.030, grad_norm=94.541, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.379e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 05:29:38,842 (trainer:732) INFO: 24epoch:train:9601-9700batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=70.548, loss_att=50.666, acc=0.716, loss=56.631, backward_time=1.030, grad_norm=106.879, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.378e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 05:31:55,248 (trainer:732) INFO: 24epoch:train:9701-9800batch: iter_time=1.109e-04, forward_time=0.146, loss_ctc=76.421, loss_att=59.658, acc=0.695, loss=64.687, backward_time=1.032, grad_norm=108.855, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.376e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 05:34:11,300 (trainer:732) INFO: 24epoch:train:9801-9900batch: iter_time=1.108e-04, forward_time=0.145, loss_ctc=69.905, loss_att=51.680, acc=0.718, loss=57.148, backward_time=1.032, grad_norm=97.751, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.374e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 05:36:27,320 (trainer:732) INFO: 24epoch:train:9901-10000batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=71.831, loss_att=57.034, acc=0.706, loss=61.473, backward_time=1.031, grad_norm=88.956, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.373e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 05:49:37,783 (trainer:338) INFO: 24epoch results: [train] iter_time=0.180, forward_time=0.148, loss_ctc=72.844, loss_att=56.139, acc=0.703, loss=61.150, backward_time=1.032, grad_norm=101.161, clip=100.000, loss_scale=4.722e+21, optim_step_time=0.183, optim0_lr0=7.454e-05, train_time=3.286, time=4 hours, 34 minutes and 8.11 seconds, total_count=210000, gpu_max_cached_mem_GB=38.229, [valid] loss_ctc=49.172, cer_ctc=0.283, loss_att=39.899, acc=0.671, cer=0.378, wer=0.988, loss=42.681, time=6 minutes and 54.41 seconds, total_count=21758, gpu_max_cached_mem_GB=38.229, [att_plot] time=5 minutes and 56.93 seconds, total_count=0, gpu_max_cached_mem_GB=38.229
+[gpub005:0/64] 2023-07-08 05:49:53,063 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub005:0/64] 2023-07-08 05:49:53,184 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/17epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/19epoch.pth
+[gpub005:0/64] 2023-07-08 05:49:53,184 (trainer:272) INFO: 25/30epoch started. Estimated time to finish: 1 day, 5 hours and 1 minute
+[gpub005:0/64] 2023-07-08 05:49:53,188 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-08 05:50:11,340 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 05:50:15,036 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b5402290>)
+[gpub005:0/64] 2023-07-08 05:50:15,036 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-08 05:50:15,043 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 05:54:20,277 (trainer:732) INFO: 25epoch:train:1-100batch: iter_time=1.258, forward_time=0.155, loss_ctc=67.394, loss_att=55.790, acc=0.669, loss=59.271, backward_time=1.041, grad_norm=94.745, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.371e-05, train_time=5.341
+[gpub005:0/64] 2023-07-08 05:56:37,742 (trainer:732) INFO: 25epoch:train:101-200batch: iter_time=1.387e-04, forward_time=0.146, loss_ctc=84.308, loss_att=59.589, acc=0.684, loss=67.005, backward_time=1.031, grad_norm=111.641, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.370e-05, train_time=2.749
+[gpub005:0/64] 2023-07-08 05:59:04,212 (trainer:732) INFO: 25epoch:train:201-300batch: iter_time=1.391e-04, forward_time=0.147, loss_ctc=78.994, loss_att=61.853, acc=0.673, loss=66.996, backward_time=1.039, grad_norm=104.903, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.368e-05, train_time=2.929
+[gpub005:0/64] 2023-07-08 06:01:23,041 (trainer:732) INFO: 25epoch:train:301-400batch: iter_time=1.347e-04, forward_time=0.151, loss_ctc=70.404, loss_att=51.376, acc=0.693, loss=57.084, backward_time=1.027, grad_norm=102.277, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.366e-05, train_time=2.776
+[gpub005:0/64] 2023-07-08 06:03:45,683 (trainer:732) INFO: 25epoch:train:401-500batch: iter_time=1.110e-04, forward_time=0.154, loss_ctc=71.110, loss_att=53.943, acc=0.687, loss=59.093, backward_time=1.036, grad_norm=110.916, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.365e-05, train_time=2.853
+[gpub005:0/64] 2023-07-08 06:06:01,868 (trainer:732) INFO: 25epoch:train:501-600batch: iter_time=1.154e-04, forward_time=0.145, loss_ctc=76.464, loss_att=60.641, acc=0.668, loss=65.388, backward_time=1.027, grad_norm=119.647, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.363e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 06:08:24,461 (trainer:732) INFO: 25epoch:train:601-700batch: iter_time=1.100e-04, forward_time=0.160, loss_ctc=73.685, loss_att=54.373, acc=0.707, loss=60.166, backward_time=1.038, grad_norm=117.803, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.362e-05, train_time=2.851
+[gpub005:0/64] 2023-07-08 06:10:47,886 (trainer:732) INFO: 25epoch:train:701-800batch: iter_time=1.234e-04, forward_time=0.153, loss_ctc=73.438, loss_att=57.055, acc=0.681, loss=61.970, backward_time=1.037, grad_norm=102.935, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.360e-05, train_time=2.869
+[gpub005:0/64] 2023-07-08 06:11:41,403 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-08 06:11:59,269 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 06:12:02,898 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94984e7c10>)
+[gpub005:0/64] 2023-07-08 06:12:02,898 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-08 06:12:02,905 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 06:16:41,012 (trainer:732) INFO: 25epoch:train:801-900batch: iter_time=1.458, forward_time=0.191, loss_ctc=67.076, loss_att=57.674, acc=0.679, loss=60.495, backward_time=1.042, grad_norm=97.265, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.185, optim0_lr0=7.358e-05, train_time=7.062
+[gpub005:0/64] 2023-07-08 06:18:59,652 (trainer:732) INFO: 25epoch:train:901-1000batch: iter_time=1.225e-04, forward_time=0.147, loss_ctc=80.822, loss_att=56.630, acc=0.693, loss=63.887, backward_time=1.033, grad_norm=105.567, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.357e-05, train_time=2.773
+[gpub005:0/64] 2023-07-08 06:21:15,828 (trainer:732) INFO: 25epoch:train:1001-1100batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=80.627, loss_att=60.328, acc=0.686, loss=66.417, backward_time=1.029, grad_norm=110.847, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.355e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 06:23:34,704 (trainer:732) INFO: 25epoch:train:1101-1200batch: iter_time=1.277e-04, forward_time=0.147, loss_ctc=68.462, loss_att=49.783, acc=0.699, loss=55.387, backward_time=1.038, grad_norm=96.138, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.354e-05, train_time=2.777
+[gpub005:0/64] 2023-07-08 06:25:51,451 (trainer:732) INFO: 25epoch:train:1201-1300batch: iter_time=1.223e-04, forward_time=0.147, loss_ctc=68.844, loss_att=51.168, acc=0.687, loss=56.471, backward_time=1.030, grad_norm=91.064, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.352e-05, train_time=2.735
+[gpub005:0/64] 2023-07-08 06:28:09,593 (trainer:732) INFO: 25epoch:train:1301-1400batch: iter_time=1.119e-04, forward_time=0.147, loss_ctc=77.293, loss_att=63.126, acc=0.669, loss=67.376, backward_time=1.029, grad_norm=94.136, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.351e-05, train_time=2.763
+[gpub005:0/64] 2023-07-08 06:30:27,572 (trainer:732) INFO: 25epoch:train:1401-1500batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=72.320, loss_att=52.628, acc=0.702, loss=58.535, backward_time=1.031, grad_norm=107.375, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.349e-05, train_time=2.759
+[gpub005:0/64] 2023-07-08 06:32:43,510 (trainer:732) INFO: 25epoch:train:1501-1600batch: iter_time=1.230e-04, forward_time=0.146, loss_ctc=72.172, loss_att=55.048, acc=0.694, loss=60.185, backward_time=1.030, grad_norm=114.079, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.347e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 06:34:30,871 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-08 06:34:49,193 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 06:34:52,675 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa039edf8e0>)
+[gpub005:0/64] 2023-07-08 06:34:52,675 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-08 06:34:52,681 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 06:39:03,625 (trainer:732) INFO: 25epoch:train:1601-1700batch: iter_time=2.312, forward_time=0.145, loss_ctc=62.890, loss_att=49.929, acc=0.685, loss=53.817, backward_time=1.041, grad_norm=91.507, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.184, optim0_lr0=7.346e-05, train_time=7.602
+[gpub005:0/64] 2023-07-08 06:41:20,097 (trainer:732) INFO: 25epoch:train:1701-1800batch: iter_time=9.406e-05, forward_time=0.144, loss_ctc=73.172, loss_att=63.151, acc=0.679, loss=66.157, backward_time=1.032, grad_norm=103.748, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.344e-05, train_time=2.729
+[gpub005:0/64] 2023-07-08 06:43:36,333 (trainer:732) INFO: 25epoch:train:1801-1900batch: iter_time=9.608e-05, forward_time=0.144, loss_ctc=77.961, loss_att=56.059, acc=0.692, loss=62.629, backward_time=1.030, grad_norm=116.681, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.343e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 06:45:52,234 (trainer:732) INFO: 25epoch:train:1901-2000batch: iter_time=1.022e-04, forward_time=0.144, loss_ctc=76.589, loss_att=57.774, acc=0.696, loss=63.419, backward_time=1.028, grad_norm=111.423, clip=100.000, loss_scale=9.445e+21, optim_step_time=0.183, optim0_lr0=7.341e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 06:48:07,914 (trainer:732) INFO: 25epoch:train:2001-2100batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=70.293, loss_att=53.244, acc=0.693, loss=58.359, backward_time=1.027, grad_norm=96.267, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.339e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 06:50:23,824 (trainer:732) INFO: 25epoch:train:2101-2200batch: iter_time=1.005e-04, forward_time=0.145, loss_ctc=67.023, loss_att=52.896, acc=0.693, loss=57.134, backward_time=1.028, grad_norm=92.993, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.338e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 06:52:40,143 (trainer:732) INFO: 25epoch:train:2201-2300batch: iter_time=1.062e-04, forward_time=0.145, loss_ctc=76.121, loss_att=61.259, acc=0.706, loss=65.718, backward_time=1.031, grad_norm=108.003, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.336e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 06:54:56,170 (trainer:732) INFO: 25epoch:train:2301-2400batch: iter_time=1.002e-04, forward_time=0.145, loss_ctc=76.319, loss_att=58.518, acc=0.696, loss=63.859, backward_time=1.030, grad_norm=138.170, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.335e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 06:57:11,886 (trainer:732) INFO: 25epoch:train:2401-2500batch: iter_time=1.036e-04, forward_time=0.145, loss_ctc=66.720, loss_att=49.422, acc=0.706, loss=54.612, backward_time=1.027, grad_norm=88.293, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.333e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 06:57:13,253 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-08 06:57:31,639 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 06:57:35,098 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f941dddc730>)
+[gpub005:0/64] 2023-07-08 06:57:35,098 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-08 06:57:35,104 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 07:03:55,517 (trainer:732) INFO: 25epoch:train:2501-2600batch: iter_time=1.269, forward_time=0.168, loss_ctc=71.680, loss_att=61.817, acc=0.673, loss=64.776, backward_time=1.045, grad_norm=123.962, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.332e-05, train_time=8.072
+[gpub005:0/64] 2023-07-08 07:06:11,651 (trainer:732) INFO: 25epoch:train:2601-2700batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=78.835, loss_att=55.917, acc=0.698, loss=62.792, backward_time=1.028, grad_norm=111.638, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.330e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 07:08:27,988 (trainer:732) INFO: 25epoch:train:2701-2800batch: iter_time=1.061e-04, forward_time=0.145, loss_ctc=74.238, loss_att=55.257, acc=0.702, loss=60.951, backward_time=1.029, grad_norm=93.733, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.328e-05, train_time=2.727
+[gpub005:0/64] 2023-07-08 07:10:43,669 (trainer:732) INFO: 25epoch:train:2801-2900batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=69.537, loss_att=53.230, acc=0.695, loss=58.122, backward_time=1.026, grad_norm=92.779, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.327e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 07:12:59,646 (trainer:732) INFO: 25epoch:train:2901-3000batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=69.671, loss_att=54.659, acc=0.687, loss=59.162, backward_time=1.030, grad_norm=102.190, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.325e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 07:15:16,087 (trainer:732) INFO: 25epoch:train:3001-3100batch: iter_time=1.036e-04, forward_time=0.146, loss_ctc=73.047, loss_att=57.793, acc=0.706, loss=62.369, backward_time=1.032, grad_norm=97.007, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.324e-05, train_time=2.729
+[gpub005:0/64] 2023-07-08 07:17:32,402 (trainer:732) INFO: 25epoch:train:3101-3200batch: iter_time=9.844e-05, forward_time=0.145, loss_ctc=75.818, loss_att=59.026, acc=0.709, loss=64.063, backward_time=1.031, grad_norm=99.614, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.322e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 07:19:48,026 (trainer:732) INFO: 25epoch:train:3201-3300batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=65.520, loss_att=44.135, acc=0.710, loss=50.551, backward_time=1.027, grad_norm=101.011, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.321e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 07:20:36,040 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-08 07:20:54,437 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 07:20:57,907 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa039ef0970>)
+[gpub005:0/64] 2023-07-08 07:20:57,907 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-08 07:20:57,913 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 07:27:17,085 (trainer:732) INFO: 25epoch:train:3301-3400batch: iter_time=1.283, forward_time=0.146, loss_ctc=63.050, loss_att=54.013, acc=0.684, loss=56.724, backward_time=1.043, grad_norm=89.866, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.319e-05, train_time=8.981
+[gpub005:0/64] 2023-07-08 07:29:33,399 (trainer:732) INFO: 25epoch:train:3401-3500batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=81.752, loss_att=62.301, acc=0.685, loss=68.136, backward_time=1.029, grad_norm=109.748, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.317e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 07:31:49,292 (trainer:732) INFO: 25epoch:train:3501-3600batch: iter_time=1.184e-04, forward_time=0.147, loss_ctc=74.281, loss_att=56.625, acc=0.693, loss=61.921, backward_time=1.027, grad_norm=105.121, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.316e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 07:34:05,347 (trainer:732) INFO: 25epoch:train:3601-3700batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=75.075, loss_att=55.767, acc=0.693, loss=61.559, backward_time=1.030, grad_norm=97.157, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.314e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 07:36:20,907 (trainer:732) INFO: 25epoch:train:3701-3800batch: iter_time=1.206e-04, forward_time=0.147, loss_ctc=69.361, loss_att=52.975, acc=0.686, loss=57.891, backward_time=1.026, grad_norm=97.077, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.313e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 07:38:36,925 (trainer:732) INFO: 25epoch:train:3801-3900batch: iter_time=1.209e-04, forward_time=0.147, loss_ctc=69.622, loss_att=55.526, acc=0.675, loss=59.755, backward_time=1.030, grad_norm=104.909, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.311e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 07:40:52,788 (trainer:732) INFO: 25epoch:train:3901-4000batch: iter_time=1.126e-04, forward_time=0.147, loss_ctc=77.126, loss_att=58.450, acc=0.698, loss=64.053, backward_time=1.028, grad_norm=127.297, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.310e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 07:43:08,833 (trainer:732) INFO: 25epoch:train:4001-4100batch: iter_time=1.138e-04, forward_time=0.147, loss_ctc=70.297, loss_att=52.546, acc=0.700, loss=57.871, backward_time=1.031, grad_norm=108.860, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.308e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 07:44:41,113 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-08 07:44:59,453 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 07:45:02,893 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa03993f460>)
+[gpub005:0/64] 2023-07-08 07:45:02,893 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-08 07:45:02,899 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 07:48:00,388 (trainer:732) INFO: 25epoch:train:4101-4200batch: iter_time=1.244, forward_time=0.152, loss_ctc=63.813, loss_att=47.498, acc=0.698, loss=52.392, backward_time=1.044, grad_norm=101.569, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.306e-05, train_time=5.831
+[gpub005:0/64] 2023-07-08 07:50:17,294 (trainer:732) INFO: 25epoch:train:4201-4300batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=74.377, loss_att=62.988, acc=0.675, loss=66.405, backward_time=1.031, grad_norm=106.091, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.305e-05, train_time=2.738
+[gpub005:0/64] 2023-07-08 07:52:33,320 (trainer:732) INFO: 25epoch:train:4301-4400batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=78.418, loss_att=54.676, acc=0.694, loss=61.798, backward_time=1.026, grad_norm=105.223, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.303e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 07:54:49,532 (trainer:732) INFO: 25epoch:train:4401-4500batch: iter_time=1.139e-04, forward_time=0.145, loss_ctc=75.077, loss_att=55.026, acc=0.702, loss=61.042, backward_time=1.033, grad_norm=101.895, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.302e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 07:57:04,904 (trainer:732) INFO: 25epoch:train:4501-4600batch: iter_time=1.237e-04, forward_time=0.145, loss_ctc=69.245, loss_att=52.258, acc=0.691, loss=57.354, backward_time=1.024, grad_norm=112.451, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.300e-05, train_time=2.707
+[gpub005:0/64] 2023-07-08 07:59:23,124 (trainer:732) INFO: 25epoch:train:4601-4700batch: iter_time=1.253e-04, forward_time=0.148, loss_ctc=67.163, loss_att=53.729, acc=0.683, loss=57.759, backward_time=1.031, grad_norm=104.056, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.299e-05, train_time=2.764
+[gpub005:0/64] 2023-07-08 08:01:40,001 (trainer:732) INFO: 25epoch:train:4701-4800batch: iter_time=1.179e-04, forward_time=0.147, loss_ctc=75.490, loss_att=58.879, acc=0.703, loss=63.862, backward_time=1.029, grad_norm=103.480, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.297e-05, train_time=2.737
+[gpub005:0/64] 2023-07-08 08:03:56,194 (trainer:732) INFO: 25epoch:train:4801-4900batch: iter_time=1.366e-04, forward_time=0.146, loss_ctc=74.857, loss_att=56.613, acc=0.698, loss=62.086, backward_time=1.033, grad_norm=105.020, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.296e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 08:06:11,952 (trainer:732) INFO: 25epoch:train:4901-5000batch: iter_time=1.173e-04, forward_time=0.146, loss_ctc=67.657, loss_att=51.018, acc=0.697, loss=56.010, backward_time=1.027, grad_norm=92.263, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.294e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 08:06:13,258 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-08 08:06:31,398 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 08:06:34,942 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa27f8b7e20>)
+[gpub005:0/64] 2023-07-08 08:06:34,942 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-08 08:06:34,948 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 08:11:35,592 (trainer:732) INFO: 25epoch:train:5001-5100batch: iter_time=1.225, forward_time=0.145, loss_ctc=64.552, loss_att=53.371, acc=0.688, loss=56.726, backward_time=1.044, grad_norm=89.113, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.292e-05, train_time=6.473
+[gpub005:0/64] 2023-07-08 08:13:51,847 (trainer:732) INFO: 25epoch:train:5101-5200batch: iter_time=1.180e-04, forward_time=0.146, loss_ctc=79.661, loss_att=57.127, acc=0.702, loss=63.887, backward_time=1.031, grad_norm=107.156, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.291e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 08:16:08,081 (trainer:732) INFO: 25epoch:train:5201-5300batch: iter_time=1.105e-04, forward_time=0.147, loss_ctc=76.918, loss_att=60.190, acc=0.694, loss=65.209, backward_time=1.031, grad_norm=114.919, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.289e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 08:18:23,889 (trainer:732) INFO: 25epoch:train:5301-5400batch: iter_time=1.060e-04, forward_time=0.146, loss_ctc=68.186, loss_att=50.614, acc=0.703, loss=55.886, backward_time=1.027, grad_norm=98.722, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.288e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 08:20:39,880 (trainer:732) INFO: 25epoch:train:5401-5500batch: iter_time=1.119e-04, forward_time=0.148, loss_ctc=68.353, loss_att=52.983, acc=0.701, loss=57.594, backward_time=1.028, grad_norm=93.295, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.286e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 08:22:56,095 (trainer:732) INFO: 25epoch:train:5501-5600batch: iter_time=1.114e-04, forward_time=0.147, loss_ctc=72.638, loss_att=57.921, acc=0.694, loss=62.336, backward_time=1.032, grad_norm=118.868, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.285e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 08:25:12,299 (trainer:732) INFO: 25epoch:train:5601-5700batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=72.064, loss_att=54.239, acc=0.715, loss=59.587, backward_time=1.032, grad_norm=100.871, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.283e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 08:27:28,098 (trainer:732) INFO: 25epoch:train:5701-5800batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=70.937, loss_att=53.502, acc=0.705, loss=58.733, backward_time=1.029, grad_norm=105.846, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.282e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 08:28:14,331 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-08 08:28:32,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 08:28:36,003 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0037274f0>)
+[gpub005:0/64] 2023-07-08 08:28:36,003 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-08 08:28:36,010 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 08:32:06,642 (trainer:732) INFO: 25epoch:train:5801-5900batch: iter_time=1.229, forward_time=0.168, loss_ctc=61.899, loss_att=49.951, acc=0.692, loss=53.536, backward_time=1.043, grad_norm=114.240, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.184, optim0_lr0=7.280e-05, train_time=5.570
+[gpub005:0/64] 2023-07-08 08:34:23,716 (trainer:732) INFO: 25epoch:train:5901-6000batch: iter_time=1.036e-04, forward_time=0.146, loss_ctc=80.144, loss_att=59.966, acc=0.694, loss=66.019, backward_time=1.033, grad_norm=117.796, clip=100.000, loss_scale=1.889e+22, optim_step_time=0.183, optim0_lr0=7.279e-05, train_time=2.741
+[gpub005:0/64] 2023-07-08 08:36:39,086 (trainer:732) INFO: 25epoch:train:6001-6100batch: iter_time=1.081e-04, forward_time=0.145, loss_ctc=74.687, loss_att=55.806, acc=0.699, loss=61.470, backward_time=1.025, grad_norm=102.244, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.277e-05, train_time=2.707
+[gpub005:0/64] 2023-07-08 08:38:54,592 (trainer:732) INFO: 25epoch:train:6101-6200batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=74.301, loss_att=54.846, acc=0.696, loss=60.682, backward_time=1.026, grad_norm=93.497, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.275e-05, train_time=2.710
+[gpub005:0/64] 2023-07-08 08:41:10,050 (trainer:732) INFO: 25epoch:train:6201-6300batch: iter_time=1.090e-04, forward_time=0.145, loss_ctc=69.302, loss_att=51.670, acc=0.693, loss=56.959, backward_time=1.027, grad_norm=100.115, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.274e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 08:43:26,047 (trainer:732) INFO: 25epoch:train:6301-6400batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=70.805, loss_att=56.455, acc=0.675, loss=60.760, backward_time=1.029, grad_norm=84.726, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.272e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 08:45:41,895 (trainer:732) INFO: 25epoch:train:6401-6500batch: iter_time=1.251e-04, forward_time=0.144, loss_ctc=75.784, loss_att=56.769, acc=0.708, loss=62.473, backward_time=1.028, grad_norm=113.466, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.271e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 08:47:57,577 (trainer:732) INFO: 25epoch:train:6501-6600batch: iter_time=1.128e-04, forward_time=0.145, loss_ctc=69.709, loss_att=52.710, acc=0.701, loss=57.809, backward_time=1.027, grad_norm=106.183, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.269e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 08:49:31,994 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-08 08:49:50,303 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 08:49:53,685 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa27f8d4970>)
+[gpub005:0/64] 2023-07-08 08:49:53,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-08 08:49:53,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 08:53:40,732 (trainer:732) INFO: 25epoch:train:6601-6700batch: iter_time=1.490, forward_time=0.185, loss_ctc=61.334, loss_att=48.253, acc=0.699, loss=52.177, backward_time=1.043, grad_norm=98.206, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.185, optim0_lr0=7.268e-05, train_time=6.863
+[gpub005:0/64] 2023-07-08 08:55:57,864 (trainer:732) INFO: 25epoch:train:6701-6800batch: iter_time=1.189e-04, forward_time=0.147, loss_ctc=76.795, loss_att=61.313, acc=0.677, loss=65.958, backward_time=1.034, grad_norm=106.691, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.266e-05, train_time=2.742
+[gpub005:0/64] 2023-07-08 08:58:14,559 (trainer:732) INFO: 25epoch:train:6801-6900batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=76.474, loss_att=54.969, acc=0.701, loss=61.420, backward_time=1.027, grad_norm=92.293, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.265e-05, train_time=2.734
+[gpub005:0/64] 2023-07-08 09:00:30,838 (trainer:732) INFO: 25epoch:train:6901-7000batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=74.051, loss_att=55.688, acc=0.700, loss=61.197, backward_time=1.029, grad_norm=124.427, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.263e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 09:02:46,205 (trainer:732) INFO: 25epoch:train:7001-7100batch: iter_time=1.143e-04, forward_time=0.145, loss_ctc=69.640, loss_att=51.459, acc=0.684, loss=56.913, backward_time=1.025, grad_norm=101.859, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.262e-05, train_time=2.707
+[gpub005:0/64] 2023-07-08 09:05:02,287 (trainer:732) INFO: 25epoch:train:7101-7200batch: iter_time=1.126e-04, forward_time=0.145, loss_ctc=67.988, loss_att=53.614, acc=0.682, loss=57.926, backward_time=1.030, grad_norm=98.586, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.260e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 09:07:18,114 (trainer:732) INFO: 25epoch:train:7201-7300batch: iter_time=1.140e-04, forward_time=0.146, loss_ctc=78.895, loss_att=59.924, acc=0.702, loss=65.615, backward_time=1.029, grad_norm=107.921, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.259e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 09:09:34,216 (trainer:732) INFO: 25epoch:train:7301-7400batch: iter_time=1.041e-04, forward_time=0.147, loss_ctc=68.386, loss_att=52.947, acc=0.699, loss=57.579, backward_time=1.029, grad_norm=100.878, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.257e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 09:11:57,640 (trainer:732) INFO: 25epoch:train:7401-7500batch: iter_time=1.042e-04, forward_time=0.147, loss_ctc=65.301, loss_att=50.575, acc=0.707, loss=54.993, backward_time=1.045, grad_norm=94.219, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.255e-05, train_time=2.868
+[gpub005:0/64] 2023-07-08 09:11:59,129 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-08 09:12:17,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 09:12:20,533 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9a69403880>)
+[gpub005:0/64] 2023-07-08 09:12:20,534 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-08 09:12:20,540 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 09:19:22,003 (trainer:732) INFO: 25epoch:train:7501-7600batch: iter_time=1.244, forward_time=0.146, loss_ctc=70.509, loss_att=61.157, acc=0.678, loss=63.963, backward_time=1.041, grad_norm=134.173, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.254e-05, train_time=8.887
+[gpub005:0/64] 2023-07-08 09:21:38,576 (trainer:732) INFO: 25epoch:train:7601-7700batch: iter_time=1.115e-04, forward_time=0.146, loss_ctc=78.059, loss_att=56.103, acc=0.702, loss=62.690, backward_time=1.028, grad_norm=103.019, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.252e-05, train_time=2.731
+[gpub005:0/64] 2023-07-08 09:23:54,909 (trainer:732) INFO: 25epoch:train:7701-7800batch: iter_time=1.239e-04, forward_time=0.148, loss_ctc=72.875, loss_att=55.188, acc=0.704, loss=60.494, backward_time=1.028, grad_norm=104.478, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.251e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 09:26:13,203 (trainer:732) INFO: 25epoch:train:7801-7900batch: iter_time=1.334e-04, forward_time=0.145, loss_ctc=69.878, loss_att=53.424, acc=0.699, loss=58.360, backward_time=1.025, grad_norm=132.395, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.249e-05, train_time=2.766
+[gpub005:0/64] 2023-07-08 09:28:29,065 (trainer:732) INFO: 25epoch:train:7901-8000batch: iter_time=1.246e-04, forward_time=0.147, loss_ctc=66.722, loss_att=53.254, acc=0.693, loss=57.294, backward_time=1.026, grad_norm=96.573, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.248e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 09:30:45,214 (trainer:732) INFO: 25epoch:train:8001-8100batch: iter_time=1.310e-04, forward_time=0.147, loss_ctc=72.346, loss_att=57.409, acc=0.709, loss=61.890, backward_time=1.028, grad_norm=100.161, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.246e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 09:33:01,171 (trainer:732) INFO: 25epoch:train:8101-8200batch: iter_time=1.067e-04, forward_time=0.147, loss_ctc=75.162, loss_att=59.259, acc=0.708, loss=64.030, backward_time=1.029, grad_norm=104.558, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.245e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 09:35:16,902 (trainer:732) INFO: 25epoch:train:8201-8300batch: iter_time=1.014e-04, forward_time=0.146, loss_ctc=63.470, loss_att=43.141, acc=0.713, loss=49.240, backward_time=1.028, grad_norm=88.867, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.243e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 09:36:06,322 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-08 09:36:24,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 09:36:28,070 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0387874f0>)
+[gpub005:0/64] 2023-07-08 09:36:28,070 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-08 09:36:28,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 09:41:52,004 (trainer:732) INFO: 25epoch:train:8301-8400batch: iter_time=1.423, forward_time=0.186, loss_ctc=61.528, loss_att=54.110, acc=0.687, loss=56.335, backward_time=1.049, grad_norm=94.433, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.184, optim0_lr0=7.242e-05, train_time=7.902
+[gpub005:0/64] 2023-07-08 09:44:08,921 (trainer:732) INFO: 25epoch:train:8401-8500batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=80.713, loss_att=60.130, acc=0.699, loss=66.305, backward_time=1.028, grad_norm=101.118, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.240e-05, train_time=2.738
+[gpub005:0/64] 2023-07-08 09:46:25,326 (trainer:732) INFO: 25epoch:train:8501-8600batch: iter_time=1.084e-04, forward_time=0.146, loss_ctc=72.864, loss_att=55.261, acc=0.709, loss=60.542, backward_time=1.029, grad_norm=110.946, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.239e-05, train_time=2.728
+[gpub005:0/64] 2023-07-08 09:48:41,215 (trainer:732) INFO: 25epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.145, loss_ctc=74.393, loss_att=55.528, acc=0.705, loss=61.188, backward_time=1.028, grad_norm=123.026, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.237e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 09:50:57,064 (trainer:732) INFO: 25epoch:train:8701-8800batch: iter_time=1.017e-04, forward_time=0.145, loss_ctc=68.057, loss_att=52.431, acc=0.694, loss=57.119, backward_time=1.028, grad_norm=108.479, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.236e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 09:53:13,155 (trainer:732) INFO: 25epoch:train:8801-8900batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=69.612, loss_att=54.933, acc=0.693, loss=59.336, backward_time=1.030, grad_norm=97.704, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.234e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 09:55:29,117 (trainer:732) INFO: 25epoch:train:8901-9000batch: iter_time=1.064e-04, forward_time=0.146, loss_ctc=74.272, loss_att=56.728, acc=0.717, loss=61.992, backward_time=1.029, grad_norm=139.457, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.233e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 09:57:44,878 (trainer:732) INFO: 25epoch:train:9001-9100batch: iter_time=1.241e-04, forward_time=0.148, loss_ctc=69.803, loss_att=53.448, acc=0.703, loss=58.354, backward_time=1.027, grad_norm=100.770, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.231e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 09:59:28,828 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-08 09:59:46,947 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 09:59:50,459 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9a694eb490>)
+[gpub005:0/64] 2023-07-08 09:59:50,460 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-08 09:59:50,466 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 10:04:57,427 (trainer:732) INFO: 25epoch:train:9101-9200batch: iter_time=2.123, forward_time=0.149, loss_ctc=61.440, loss_att=46.047, acc=0.707, loss=50.665, backward_time=1.039, grad_norm=93.648, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.230e-05, train_time=8.651
+[gpub005:0/64] 2023-07-08 10:07:16,844 (trainer:732) INFO: 25epoch:train:9201-9300batch: iter_time=1.315e-04, forward_time=0.147, loss_ctc=70.907, loss_att=60.317, acc=0.693, loss=63.494, backward_time=1.032, grad_norm=107.964, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.228e-05, train_time=2.788
+[gpub005:0/64] 2023-07-08 10:10:27,961 (trainer:732) INFO: 25epoch:train:9301-9400batch: iter_time=1.220e-04, forward_time=0.148, loss_ctc=76.024, loss_att=54.562, acc=0.702, loss=61.001, backward_time=1.074, grad_norm=98.849, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.227e-05, train_time=3.822
+[gpub005:0/64] 2023-07-08 10:13:29,821 (trainer:732) INFO: 25epoch:train:9401-9500batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=74.449, loss_att=55.408, acc=0.705, loss=61.120, backward_time=1.077, grad_norm=113.210, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.225e-05, train_time=3.637
+[gpub005:0/64] 2023-07-08 10:16:27,233 (trainer:732) INFO: 25epoch:train:9501-9600batch: iter_time=1.013e-04, forward_time=0.144, loss_ctc=69.774, loss_att=53.169, acc=0.695, loss=58.150, backward_time=1.088, grad_norm=100.371, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.224e-05, train_time=3.548
+[gpub005:0/64] 2023-07-08 10:19:25,557 (trainer:732) INFO: 25epoch:train:9601-9700batch: iter_time=9.534e-05, forward_time=0.146, loss_ctc=67.052, loss_att=52.342, acc=0.696, loss=56.755, backward_time=1.091, grad_norm=115.351, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.222e-05, train_time=3.566
+[gpub005:0/64] 2023-07-08 10:22:24,138 (trainer:732) INFO: 25epoch:train:9701-9800batch: iter_time=9.886e-05, forward_time=0.146, loss_ctc=75.203, loss_att=60.399, acc=0.707, loss=64.840, backward_time=1.082, grad_norm=99.748, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.221e-05, train_time=3.571
+[gpub005:0/64] 2023-07-08 10:25:09,252 (trainer:732) INFO: 25epoch:train:9801-9900batch: iter_time=1.005e-04, forward_time=0.146, loss_ctc=73.722, loss_att=56.459, acc=0.708, loss=61.638, backward_time=1.067, grad_norm=108.586, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.219e-05, train_time=3.302
+[gpub005:0/64] 2023-07-08 10:27:55,647 (trainer:732) INFO: 25epoch:train:9901-10000batch: iter_time=9.428e-05, forward_time=0.145, loss_ctc=65.177, loss_att=47.351, acc=0.716, loss=52.698, backward_time=1.069, grad_norm=95.321, clip=100.000, loss_scale=3.778e+22, optim_step_time=0.183, optim0_lr0=7.218e-05, train_time=3.328
+[gpub005:0/64] 2023-07-08 10:42:07,209 (trainer:338) INFO: 25epoch results: [train] iter_time=0.176, forward_time=0.148, loss_ctc=71.983, loss_att=55.204, acc=0.695, loss=60.238, backward_time=1.035, grad_norm=104.818, clip=100.000, loss_scale=2.456e+22, optim_step_time=0.183, optim0_lr0=7.294e-05, train_time=3.336, time=4 hours, 38 minutes and 25.25 seconds, total_count=220000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=47.009, cer_ctc=0.273, loss_att=42.530, acc=0.652, cer=0.442, wer=1.000, loss=43.874, time=7 minutes and 56.01 seconds, total_count=22770, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 52.76 seconds, total_count=0, gpu_max_cached_mem_GB=38.234
+[gpub005:0/64] 2023-07-08 10:42:26,240 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub005:0/64] 2023-07-08 10:42:26,246 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/20epoch.pth
+[gpub005:0/64] 2023-07-08 10:42:26,311 (trainer:272) INFO: 26/30epoch started. Estimated time to finish: 1 day, 15 minutes and 1.2 seconds
+[gpub005:0/64] 2023-07-08 10:42:27,702 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-08 10:42:46,699 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 10:42:50,222 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9f31e701c0>)
+[gpub005:0/64] 2023-07-08 10:42:50,222 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-08 10:42:50,310 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 10:53:23,692 (trainer:732) INFO: 26epoch:train:1-100batch: iter_time=5.145, forward_time=0.174, loss_ctc=69.387, loss_att=53.258, acc=0.713, loss=58.097, backward_time=1.043, grad_norm=87.743, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.186, optim0_lr0=7.216e-05, train_time=13.133
+[gpub005:0/64] 2023-07-08 10:55:39,564 (trainer:732) INFO: 26epoch:train:101-200batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=69.584, loss_att=53.691, acc=0.707, loss=58.459, backward_time=1.030, grad_norm=113.614, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.215e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 10:57:57,029 (trainer:732) INFO: 26epoch:train:201-300batch: iter_time=1.211e-04, forward_time=0.147, loss_ctc=76.842, loss_att=60.594, acc=0.691, loss=65.468, backward_time=1.031, grad_norm=97.403, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.213e-05, train_time=2.749
+[gpub005:0/64] 2023-07-08 11:00:13,880 (trainer:732) INFO: 26epoch:train:301-400batch: iter_time=1.292e-04, forward_time=0.146, loss_ctc=75.295, loss_att=55.762, acc=0.698, loss=61.622, backward_time=1.028, grad_norm=130.883, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.212e-05, train_time=2.737
+[gpub005:0/64] 2023-07-08 11:02:31,540 (trainer:732) INFO: 26epoch:train:401-500batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=71.385, loss_att=52.705, acc=0.691, loss=58.309, backward_time=1.030, grad_norm=118.277, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.210e-05, train_time=2.753
+[gpub005:0/64] 2023-07-08 11:04:48,000 (trainer:732) INFO: 26epoch:train:501-600batch: iter_time=1.212e-04, forward_time=0.146, loss_ctc=86.870, loss_att=64.836, acc=0.690, loss=71.446, backward_time=1.030, grad_norm=142.009, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.209e-05, train_time=2.729
+[gpub005:0/64] 2023-07-08 11:07:14,447 (trainer:732) INFO: 26epoch:train:601-700batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=75.534, loss_att=59.806, acc=0.685, loss=64.524, backward_time=1.037, grad_norm=135.306, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.207e-05, train_time=2.929
+[gpub005:0/64] 2023-07-08 11:09:37,366 (trainer:732) INFO: 26epoch:train:701-800batch: iter_time=1.261e-04, forward_time=0.145, loss_ctc=75.179, loss_att=56.787, acc=0.705, loss=62.304, backward_time=1.047, grad_norm=102.383, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.206e-05, train_time=2.858
+[gpub005:0/64] 2023-07-08 11:10:31,607 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-08 11:10:49,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 11:10:52,871 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949b7537f0>)
+[gpub005:0/64] 2023-07-08 11:10:52,871 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-08 11:10:52,877 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 11:14:37,121 (trainer:732) INFO: 26epoch:train:801-900batch: iter_time=1.279, forward_time=0.180, loss_ctc=66.178, loss_att=53.712, acc=0.706, loss=57.452, backward_time=1.040, grad_norm=98.261, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.204e-05, train_time=5.995
+[gpub005:0/64] 2023-07-08 11:16:53,461 (trainer:732) INFO: 26epoch:train:901-1000batch: iter_time=1.033e-04, forward_time=0.146, loss_ctc=69.626, loss_att=51.851, acc=0.699, loss=57.184, backward_time=1.025, grad_norm=93.025, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.203e-05, train_time=2.727
+[gpub005:0/64] 2023-07-08 11:19:09,392 (trainer:732) INFO: 26epoch:train:1001-1100batch: iter_time=1.097e-04, forward_time=0.146, loss_ctc=72.606, loss_att=58.385, acc=0.692, loss=62.651, backward_time=1.028, grad_norm=94.394, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.201e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 11:21:25,206 (trainer:732) INFO: 26epoch:train:1101-1200batch: iter_time=1.058e-04, forward_time=0.145, loss_ctc=75.555, loss_att=58.013, acc=0.691, loss=63.275, backward_time=1.026, grad_norm=102.726, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.200e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 11:23:40,877 (trainer:732) INFO: 26epoch:train:1201-1300batch: iter_time=1.002e-04, forward_time=0.146, loss_ctc=74.959, loss_att=55.140, acc=0.697, loss=61.086, backward_time=1.028, grad_norm=94.634, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.198e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 11:25:56,605 (trainer:732) INFO: 26epoch:train:1301-1400batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=78.132, loss_att=58.606, acc=0.681, loss=64.464, backward_time=1.028, grad_norm=128.909, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.197e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 11:28:12,321 (trainer:732) INFO: 26epoch:train:1401-1500batch: iter_time=1.088e-04, forward_time=0.146, loss_ctc=80.030, loss_att=66.092, acc=0.666, loss=70.273, backward_time=1.027, grad_norm=120.822, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.195e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 11:30:27,679 (trainer:732) INFO: 26epoch:train:1501-1600batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=72.335, loss_att=51.339, acc=0.698, loss=57.638, backward_time=1.027, grad_norm=90.172, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.194e-05, train_time=2.707
+[gpub005:0/64] 2023-07-08 11:32:05,236 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-08 11:32:23,714 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 11:32:27,250 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9482a11510>)
+[gpub005:0/64] 2023-07-08 11:32:27,250 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-08 11:32:27,257 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 11:35:43,326 (trainer:732) INFO: 26epoch:train:1601-1700batch: iter_time=1.695, forward_time=0.145, loss_ctc=67.914, loss_att=52.867, acc=0.699, loss=57.381, backward_time=1.039, grad_norm=90.508, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.192e-05, train_time=6.313
+[gpub005:0/64] 2023-07-08 11:37:59,366 (trainer:732) INFO: 26epoch:train:1701-1800batch: iter_time=1.022e-04, forward_time=0.145, loss_ctc=69.223, loss_att=53.087, acc=0.697, loss=57.928, backward_time=1.029, grad_norm=89.506, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.191e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 11:40:15,245 (trainer:732) INFO: 26epoch:train:1801-1900batch: iter_time=9.824e-05, forward_time=0.145, loss_ctc=72.519, loss_att=57.553, acc=0.693, loss=62.043, backward_time=1.029, grad_norm=92.250, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.189e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 11:42:31,327 (trainer:732) INFO: 26epoch:train:1901-2000batch: iter_time=9.918e-05, forward_time=0.146, loss_ctc=76.626, loss_att=56.219, acc=0.694, loss=62.341, backward_time=1.028, grad_norm=104.830, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.188e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 11:44:46,889 (trainer:732) INFO: 26epoch:train:2001-2100batch: iter_time=1.021e-04, forward_time=0.145, loss_ctc=76.462, loss_att=56.079, acc=0.699, loss=62.194, backward_time=1.026, grad_norm=100.652, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.186e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 11:47:02,624 (trainer:732) INFO: 26epoch:train:2101-2200batch: iter_time=1.040e-04, forward_time=0.146, loss_ctc=74.787, loss_att=56.538, acc=0.684, loss=62.013, backward_time=1.027, grad_norm=120.468, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.185e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 11:49:18,264 (trainer:732) INFO: 26epoch:train:2201-2300batch: iter_time=1.109e-04, forward_time=0.145, loss_ctc=84.338, loss_att=68.512, acc=0.663, loss=73.260, backward_time=1.027, grad_norm=102.630, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.183e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 11:51:33,885 (trainer:732) INFO: 26epoch:train:2301-2400batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=70.380, loss_att=50.909, acc=0.699, loss=56.750, backward_time=1.027, grad_norm=96.571, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.182e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 11:53:49,838 (trainer:732) INFO: 26epoch:train:2401-2500batch: iter_time=1.182e-04, forward_time=0.148, loss_ctc=69.633, loss_att=54.823, acc=0.700, loss=59.266, backward_time=1.028, grad_norm=116.785, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.180e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 11:53:51,202 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-08 11:54:09,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 11:54:12,870 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949adbf430>)
+[gpub005:0/64] 2023-07-08 11:54:12,870 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-08 11:54:12,876 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 11:59:26,483 (trainer:732) INFO: 26epoch:train:2501-2600batch: iter_time=1.206, forward_time=0.146, loss_ctc=72.363, loss_att=51.578, acc=0.716, loss=57.813, backward_time=1.040, grad_norm=117.065, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.179e-05, train_time=6.733
+[gpub005:0/64] 2023-07-08 12:01:42,517 (trainer:732) INFO: 26epoch:train:2601-2700batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=68.902, loss_att=59.039, acc=0.707, loss=61.998, backward_time=1.028, grad_norm=96.352, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.177e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 12:03:58,444 (trainer:732) INFO: 26epoch:train:2701-2800batch: iter_time=1.304e-04, forward_time=0.147, loss_ctc=79.158, loss_att=56.178, acc=0.715, loss=63.072, backward_time=1.028, grad_norm=99.364, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.176e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 12:06:14,072 (trainer:732) INFO: 26epoch:train:2801-2900batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=69.645, loss_att=52.512, acc=0.705, loss=57.652, backward_time=1.027, grad_norm=91.072, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.174e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 12:08:29,717 (trainer:732) INFO: 26epoch:train:2901-3000batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=71.834, loss_att=56.794, acc=0.687, loss=61.306, backward_time=1.027, grad_norm=93.266, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.173e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 12:10:45,477 (trainer:732) INFO: 26epoch:train:3001-3100batch: iter_time=1.273e-04, forward_time=0.146, loss_ctc=81.950, loss_att=63.128, acc=0.688, loss=68.774, backward_time=1.029, grad_norm=123.293, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.171e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 12:13:01,110 (trainer:732) INFO: 26epoch:train:3101-3200batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=72.267, loss_att=57.245, acc=0.702, loss=61.751, backward_time=1.027, grad_norm=98.018, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.170e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 12:15:16,678 (trainer:732) INFO: 26epoch:train:3201-3300batch: iter_time=1.153e-04, forward_time=0.146, loss_ctc=69.021, loss_att=54.141, acc=0.701, loss=58.605, backward_time=1.027, grad_norm=97.100, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.168e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 12:16:04,551 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-08 12:16:23,248 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 12:16:26,719 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa038993520>)
+[gpub005:0/64] 2023-07-08 12:16:26,719 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-08 12:16:26,725 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 12:20:37,309 (trainer:732) INFO: 26epoch:train:3301-3400batch: iter_time=1.265, forward_time=0.147, loss_ctc=73.247, loss_att=53.851, acc=0.718, loss=59.669, backward_time=1.041, grad_norm=101.473, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.167e-05, train_time=6.412
+[gpub005:0/64] 2023-07-08 12:22:53,523 (trainer:732) INFO: 26epoch:train:3401-3500batch: iter_time=1.317e-04, forward_time=0.146, loss_ctc=67.909, loss_att=50.412, acc=0.701, loss=55.661, backward_time=1.026, grad_norm=114.248, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.166e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 12:25:12,032 (trainer:732) INFO: 26epoch:train:3501-3600batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=72.018, loss_att=57.627, acc=0.696, loss=61.944, backward_time=1.031, grad_norm=94.403, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.164e-05, train_time=2.770
+[gpub005:0/64] 2023-07-08 12:27:27,429 (trainer:732) INFO: 26epoch:train:3601-3700batch: iter_time=1.369e-04, forward_time=0.145, loss_ctc=76.212, loss_att=57.567, acc=0.692, loss=63.161, backward_time=1.026, grad_norm=110.612, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.163e-05, train_time=2.708
+[gpub005:0/64] 2023-07-08 12:29:43,097 (trainer:732) INFO: 26epoch:train:3701-3800batch: iter_time=1.296e-04, forward_time=0.146, loss_ctc=73.717, loss_att=54.219, acc=0.698, loss=60.068, backward_time=1.027, grad_norm=105.595, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.161e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 12:32:01,979 (trainer:732) INFO: 26epoch:train:3801-3900batch: iter_time=1.318e-04, forward_time=0.148, loss_ctc=75.248, loss_att=55.956, acc=0.692, loss=61.744, backward_time=1.028, grad_norm=155.550, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.160e-05, train_time=2.777
+[gpub005:0/64] 2023-07-08 12:34:19,829 (trainer:732) INFO: 26epoch:train:3901-4000batch: iter_time=1.284e-04, forward_time=0.147, loss_ctc=81.079, loss_att=65.252, acc=0.669, loss=70.000, backward_time=1.027, grad_norm=104.122, clip=100.000, loss_scale=7.556e+22, optim_step_time=0.183, optim0_lr0=7.158e-05, train_time=2.757
+[gpub005:0/64] 2023-07-08 12:36:41,064 (trainer:732) INFO: 26epoch:train:4001-4100batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=71.146, loss_att=50.789, acc=0.703, loss=56.897, backward_time=1.035, grad_norm=105.814, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.157e-05, train_time=2.824
+[gpub005:0/64] 2023-07-08 12:38:22,077 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-08 12:38:40,154 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 12:38:43,616 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa039763d00>)
+[gpub005:0/64] 2023-07-08 12:38:43,616 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-08 12:38:43,622 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 12:44:13,729 (trainer:732) INFO: 26epoch:train:4101-4200batch: iter_time=1.254, forward_time=0.146, loss_ctc=65.207, loss_att=53.418, acc=0.701, loss=56.954, backward_time=1.057, grad_norm=94.878, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.155e-05, train_time=9.053
+[gpub005:0/64] 2023-07-08 12:46:31,564 (trainer:732) INFO: 26epoch:train:4201-4300batch: iter_time=1.181e-04, forward_time=0.147, loss_ctc=69.637, loss_att=51.000, acc=0.720, loss=56.591, backward_time=1.028, grad_norm=91.646, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.154e-05, train_time=2.756
+[gpub005:0/64] 2023-07-08 12:48:47,826 (trainer:732) INFO: 26epoch:train:4301-4400batch: iter_time=1.210e-04, forward_time=0.147, loss_ctc=69.877, loss_att=58.670, acc=0.710, loss=62.032, backward_time=1.031, grad_norm=90.287, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.152e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 12:51:03,869 (trainer:732) INFO: 26epoch:train:4401-4500batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=78.707, loss_att=56.151, acc=0.712, loss=62.918, backward_time=1.028, grad_norm=100.064, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.151e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 12:53:19,561 (trainer:732) INFO: 26epoch:train:4501-4600batch: iter_time=1.218e-04, forward_time=0.146, loss_ctc=72.489, loss_att=53.880, acc=0.706, loss=59.463, backward_time=1.028, grad_norm=96.597, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.149e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 12:55:35,638 (trainer:732) INFO: 26epoch:train:4601-4700batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=69.209, loss_att=55.959, acc=0.692, loss=59.934, backward_time=1.030, grad_norm=104.066, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.148e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 12:57:51,477 (trainer:732) INFO: 26epoch:train:4701-4800batch: iter_time=1.274e-04, forward_time=0.147, loss_ctc=79.306, loss_att=59.281, acc=0.689, loss=65.288, backward_time=1.028, grad_norm=107.090, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.146e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 13:00:07,474 (trainer:732) INFO: 26epoch:train:4801-4900batch: iter_time=1.384e-04, forward_time=0.147, loss_ctc=73.038, loss_att=57.206, acc=0.700, loss=61.956, backward_time=1.030, grad_norm=99.761, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.145e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 13:02:23,155 (trainer:732) INFO: 26epoch:train:4901-5000batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=69.379, loss_att=53.802, acc=0.705, loss=58.475, backward_time=1.028, grad_norm=104.017, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.144e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 13:02:24,540 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-08 13:02:42,839 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 13:02:46,436 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f935162b490>)
+[gpub005:0/64] 2023-07-08 13:02:46,436 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-08 13:02:46,442 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 13:07:28,194 (trainer:732) INFO: 26epoch:train:5001-5100batch: iter_time=1.260, forward_time=0.147, loss_ctc=73.292, loss_att=50.325, acc=0.720, loss=57.215, backward_time=1.044, grad_norm=94.971, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.142e-05, train_time=6.101
+[gpub005:0/64] 2023-07-08 13:09:44,725 (trainer:732) INFO: 26epoch:train:5101-5200batch: iter_time=1.017e-04, forward_time=0.146, loss_ctc=69.927, loss_att=58.461, acc=0.708, loss=61.901, backward_time=1.030, grad_norm=103.765, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.141e-05, train_time=2.730
+[gpub005:0/64] 2023-07-08 13:12:00,343 (trainer:732) INFO: 26epoch:train:5201-5300batch: iter_time=9.994e-05, forward_time=0.145, loss_ctc=80.359, loss_att=56.264, acc=0.712, loss=63.492, backward_time=1.026, grad_norm=116.586, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.139e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 13:14:16,318 (trainer:732) INFO: 26epoch:train:5301-5400batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=69.708, loss_att=52.973, acc=0.705, loss=57.994, backward_time=1.028, grad_norm=105.492, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.138e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 13:16:31,923 (trainer:732) INFO: 26epoch:train:5401-5500batch: iter_time=1.216e-04, forward_time=0.148, loss_ctc=70.245, loss_att=55.219, acc=0.686, loss=59.727, backward_time=1.026, grad_norm=110.833, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.136e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 13:18:47,558 (trainer:732) INFO: 26epoch:train:5501-5600batch: iter_time=1.173e-04, forward_time=0.148, loss_ctc=80.307, loss_att=60.895, acc=0.691, loss=66.719, backward_time=1.026, grad_norm=99.104, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.135e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 13:21:03,349 (trainer:732) INFO: 26epoch:train:5601-5700batch: iter_time=1.161e-04, forward_time=0.148, loss_ctc=70.927, loss_att=55.692, acc=0.706, loss=60.262, backward_time=1.027, grad_norm=104.300, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.133e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 13:23:19,161 (trainer:732) INFO: 26epoch:train:5701-5800batch: iter_time=1.018e-04, forward_time=0.146, loss_ctc=67.813, loss_att=52.632, acc=0.705, loss=57.186, backward_time=1.026, grad_norm=101.610, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.132e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 13:24:06,674 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-08 13:24:24,853 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 13:24:28,314 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa003b077c0>)
+[gpub005:0/64] 2023-07-08 13:24:28,315 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-08 13:24:28,321 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 13:29:09,299 (trainer:732) INFO: 26epoch:train:5801-5900batch: iter_time=1.271, forward_time=0.172, loss_ctc=73.165, loss_att=53.526, acc=0.722, loss=59.418, backward_time=1.037, grad_norm=95.823, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.185, optim0_lr0=7.130e-05, train_time=7.002
+[gpub005:0/64] 2023-07-08 13:31:28,366 (trainer:732) INFO: 26epoch:train:5901-6000batch: iter_time=1.015e-04, forward_time=0.147, loss_ctc=67.679, loss_att=48.515, acc=0.726, loss=54.264, backward_time=1.030, grad_norm=99.757, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.129e-05, train_time=2.782
+[gpub005:0/64] 2023-07-08 13:33:57,056 (trainer:732) INFO: 26epoch:train:6001-6100batch: iter_time=1.045e-04, forward_time=0.146, loss_ctc=72.320, loss_att=57.931, acc=0.702, loss=62.248, backward_time=1.035, grad_norm=96.806, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.128e-05, train_time=2.974
+[gpub005:0/64] 2023-07-08 13:36:18,221 (trainer:732) INFO: 26epoch:train:6101-6200batch: iter_time=1.158e-04, forward_time=0.146, loss_ctc=74.820, loss_att=56.437, acc=0.706, loss=61.952, backward_time=1.041, grad_norm=104.704, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.126e-05, train_time=2.823
+[gpub005:0/64] 2023-07-08 13:38:37,328 (trainer:732) INFO: 26epoch:train:6201-6300batch: iter_time=1.170e-04, forward_time=0.146, loss_ctc=74.027, loss_att=54.430, acc=0.705, loss=60.309, backward_time=1.037, grad_norm=95.467, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.125e-05, train_time=2.782
+[gpub005:0/64] 2023-07-08 13:40:53,195 (trainer:732) INFO: 26epoch:train:6301-6400batch: iter_time=1.223e-04, forward_time=0.147, loss_ctc=74.703, loss_att=56.062, acc=0.699, loss=61.654, backward_time=1.027, grad_norm=102.603, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.123e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 13:43:09,362 (trainer:732) INFO: 26epoch:train:6401-6500batch: iter_time=1.088e-04, forward_time=0.147, loss_ctc=78.713, loss_att=64.969, acc=0.686, loss=69.093, backward_time=1.028, grad_norm=104.530, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.122e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 13:45:28,847 (trainer:732) INFO: 26epoch:train:6501-6600batch: iter_time=1.245e-04, forward_time=0.147, loss_ctc=69.965, loss_att=51.127, acc=0.717, loss=56.778, backward_time=1.043, grad_norm=82.428, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.120e-05, train_time=2.789
+[gpub005:0/64] 2023-07-08 13:47:05,544 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-08 13:47:23,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 13:47:27,253 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0397b7af0>)
+[gpub005:0/64] 2023-07-08 13:47:27,253 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-08 13:47:27,260 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 13:50:28,307 (trainer:732) INFO: 26epoch:train:6601-6700batch: iter_time=1.259, forward_time=0.147, loss_ctc=64.311, loss_att=52.844, acc=0.702, loss=56.284, backward_time=1.044, grad_norm=99.413, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.119e-05, train_time=5.989
+[gpub005:0/64] 2023-07-08 13:52:44,647 (trainer:732) INFO: 26epoch:train:6701-6800batch: iter_time=1.222e-04, forward_time=0.146, loss_ctc=69.999, loss_att=51.589, acc=0.707, loss=57.112, backward_time=1.028, grad_norm=108.206, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.117e-05, train_time=2.727
+[gpub005:0/64] 2023-07-08 13:55:00,535 (trainer:732) INFO: 26epoch:train:6801-6900batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=68.683, loss_att=58.666, acc=0.702, loss=61.671, backward_time=1.027, grad_norm=95.657, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.116e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 13:57:16,106 (trainer:732) INFO: 26epoch:train:6901-7000batch: iter_time=1.367e-04, forward_time=0.146, loss_ctc=79.555, loss_att=57.273, acc=0.699, loss=63.958, backward_time=1.027, grad_norm=106.510, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.115e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 13:59:31,764 (trainer:732) INFO: 26epoch:train:7001-7100batch: iter_time=1.325e-04, forward_time=0.147, loss_ctc=71.814, loss_att=53.324, acc=0.706, loss=58.871, backward_time=1.027, grad_norm=103.081, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.113e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 14:01:47,399 (trainer:732) INFO: 26epoch:train:7101-7200batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=69.657, loss_att=56.274, acc=0.687, loss=60.289, backward_time=1.027, grad_norm=96.955, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.112e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 14:04:03,309 (trainer:732) INFO: 26epoch:train:7201-7300batch: iter_time=1.258e-04, forward_time=0.147, loss_ctc=77.898, loss_att=59.498, acc=0.680, loss=65.018, backward_time=1.029, grad_norm=113.679, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.110e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 14:06:19,024 (trainer:732) INFO: 26epoch:train:7301-7400batch: iter_time=1.125e-04, forward_time=0.146, loss_ctc=72.524, loss_att=56.616, acc=0.693, loss=61.388, backward_time=1.028, grad_norm=93.674, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.109e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 14:08:34,660 (trainer:732) INFO: 26epoch:train:7401-7500batch: iter_time=1.323e-04, forward_time=0.146, loss_ctc=69.537, loss_att=53.600, acc=0.701, loss=58.382, backward_time=1.027, grad_norm=106.828, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.107e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 14:08:35,961 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-08 14:08:54,239 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 14:08:57,934 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949bb2b7c0>)
+[gpub005:0/64] 2023-07-08 14:08:57,934 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-08 14:08:57,940 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 14:15:22,244 (trainer:732) INFO: 26epoch:train:7501-7600batch: iter_time=1.758, forward_time=0.148, loss_ctc=68.459, loss_att=51.191, acc=0.727, loss=56.372, backward_time=1.038, grad_norm=87.229, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.106e-05, train_time=8.151
+[gpub005:0/64] 2023-07-08 14:17:40,957 (trainer:732) INFO: 26epoch:train:7601-7700batch: iter_time=1.404e-04, forward_time=0.147, loss_ctc=68.155, loss_att=52.137, acc=0.719, loss=56.943, backward_time=1.029, grad_norm=151.653, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.104e-05, train_time=2.774
+[gpub005:0/64] 2023-07-08 14:19:57,108 (trainer:732) INFO: 26epoch:train:7701-7800batch: iter_time=1.239e-04, forward_time=0.147, loss_ctc=75.907, loss_att=59.882, acc=0.701, loss=64.690, backward_time=1.028, grad_norm=99.037, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.103e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 14:22:12,912 (trainer:732) INFO: 26epoch:train:7801-7900batch: iter_time=1.356e-04, forward_time=0.147, loss_ctc=70.855, loss_att=54.076, acc=0.710, loss=59.110, backward_time=1.028, grad_norm=103.081, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.102e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 14:24:30,607 (trainer:732) INFO: 26epoch:train:7901-8000batch: iter_time=1.361e-04, forward_time=0.148, loss_ctc=68.206, loss_att=51.104, acc=0.702, loss=56.235, backward_time=1.031, grad_norm=99.810, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.100e-05, train_time=2.754
+[gpub005:0/64] 2023-07-08 14:26:47,209 (trainer:732) INFO: 26epoch:train:8001-8100batch: iter_time=1.410e-04, forward_time=0.147, loss_ctc=82.712, loss_att=63.271, acc=0.695, loss=69.104, backward_time=1.026, grad_norm=111.254, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.099e-05, train_time=2.732
+[gpub005:0/64] 2023-07-08 14:29:11,879 (trainer:732) INFO: 26epoch:train:8101-8200batch: iter_time=1.378e-04, forward_time=0.147, loss_ctc=73.581, loss_att=58.566, acc=0.694, loss=63.071, backward_time=1.037, grad_norm=97.879, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.097e-05, train_time=2.893
+[gpub005:0/64] 2023-07-08 14:31:27,869 (trainer:732) INFO: 26epoch:train:8201-8300batch: iter_time=1.301e-04, forward_time=0.147, loss_ctc=73.403, loss_att=55.466, acc=0.709, loss=60.847, backward_time=1.028, grad_norm=102.034, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.096e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 14:32:17,789 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-08 14:32:35,784 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 14:32:39,537 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa003f88700>)
+[gpub005:0/64] 2023-07-08 14:32:39,537 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-08 14:32:39,543 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 14:38:06,476 (trainer:732) INFO: 26epoch:train:8301-8400batch: iter_time=1.307, forward_time=0.146, loss_ctc=68.148, loss_att=52.608, acc=0.711, loss=57.270, backward_time=1.039, grad_norm=125.284, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.094e-05, train_time=7.972
+[gpub005:0/64] 2023-07-08 14:40:23,154 (trainer:732) INFO: 26epoch:train:8401-8500batch: iter_time=1.212e-04, forward_time=0.147, loss_ctc=68.148, loss_att=52.855, acc=0.701, loss=57.443, backward_time=1.027, grad_norm=93.761, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.093e-05, train_time=2.733
+[gpub005:0/64] 2023-07-08 14:42:39,471 (trainer:732) INFO: 26epoch:train:8501-8600batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=75.352, loss_att=59.373, acc=0.694, loss=64.167, backward_time=1.026, grad_norm=104.938, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.092e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 14:44:55,183 (trainer:732) INFO: 26epoch:train:8601-8700batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=71.071, loss_att=53.446, acc=0.700, loss=58.734, backward_time=1.026, grad_norm=98.544, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.090e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 14:47:10,603 (trainer:732) INFO: 26epoch:train:8701-8800batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=73.000, loss_att=54.481, acc=0.691, loss=60.037, backward_time=1.026, grad_norm=114.647, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.089e-05, train_time=2.708
+[gpub005:0/64] 2023-07-08 14:49:26,242 (trainer:732) INFO: 26epoch:train:8801-8900batch: iter_time=1.301e-04, forward_time=0.146, loss_ctc=76.952, loss_att=59.376, acc=0.693, loss=64.649, backward_time=1.028, grad_norm=98.987, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.087e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 14:51:41,715 (trainer:732) INFO: 26epoch:train:8901-9000batch: iter_time=1.081e-04, forward_time=0.146, loss_ctc=78.083, loss_att=61.980, acc=0.678, loss=66.811, backward_time=1.026, grad_norm=108.739, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.086e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 14:53:57,229 (trainer:732) INFO: 26epoch:train:9001-9100batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=70.526, loss_att=52.858, acc=0.705, loss=58.158, backward_time=1.026, grad_norm=99.831, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.084e-05, train_time=2.710
+[gpub005:0/64] 2023-07-08 14:55:29,936 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-08 14:55:48,203 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 14:55:51,676 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93619973d0>)
+[gpub005:0/64] 2023-07-08 14:55:51,676 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-08 14:55:51,682 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 14:59:11,906 (trainer:732) INFO: 26epoch:train:9101-9200batch: iter_time=1.673, forward_time=0.148, loss_ctc=69.871, loss_att=52.356, acc=0.707, loss=57.611, backward_time=1.042, grad_norm=126.560, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.083e-05, train_time=6.293
+[gpub005:0/64] 2023-07-08 15:01:28,565 (trainer:732) INFO: 26epoch:train:9201-9300batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=71.189, loss_att=51.918, acc=0.707, loss=57.699, backward_time=1.029, grad_norm=123.852, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.082e-05, train_time=2.733
+[gpub005:0/64] 2023-07-08 15:03:47,418 (trainer:732) INFO: 26epoch:train:9301-9400batch: iter_time=1.201e-04, forward_time=0.148, loss_ctc=67.872, loss_att=55.829, acc=0.707, loss=59.442, backward_time=1.033, grad_norm=91.349, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.080e-05, train_time=2.777
+[gpub005:0/64] 2023-07-08 15:06:06,242 (trainer:732) INFO: 26epoch:train:9401-9500batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=78.903, loss_att=56.094, acc=0.705, loss=62.937, backward_time=1.030, grad_norm=96.669, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.079e-05, train_time=2.776
+[gpub005:0/64] 2023-07-08 15:08:22,234 (trainer:732) INFO: 26epoch:train:9501-9600batch: iter_time=1.098e-04, forward_time=0.147, loss_ctc=71.778, loss_att=52.472, acc=0.708, loss=58.264, backward_time=1.028, grad_norm=100.964, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.077e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 15:10:42,121 (trainer:732) INFO: 26epoch:train:9601-9700batch: iter_time=1.164e-04, forward_time=0.147, loss_ctc=69.666, loss_att=56.280, acc=0.686, loss=60.296, backward_time=1.041, grad_norm=97.150, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.076e-05, train_time=2.798
+[gpub005:0/64] 2023-07-08 15:13:00,926 (trainer:732) INFO: 26epoch:train:9701-9800batch: iter_time=1.106e-04, forward_time=0.147, loss_ctc=77.484, loss_att=59.398, acc=0.680, loss=64.824, backward_time=1.034, grad_norm=104.554, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.075e-05, train_time=2.776
+[gpub005:0/64] 2023-07-08 15:15:25,022 (trainer:732) INFO: 26epoch:train:9801-9900batch: iter_time=1.269e-04, forward_time=0.157, loss_ctc=69.791, loss_att=54.546, acc=0.702, loss=59.120, backward_time=1.041, grad_norm=91.940, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.186, optim0_lr0=7.073e-05, train_time=2.882
+[gpub005:0/64] 2023-07-08 15:17:40,647 (trainer:732) INFO: 26epoch:train:9901-10000batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=67.648, loss_att=52.440, acc=0.704, loss=57.002, backward_time=1.025, grad_norm=92.639, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.072e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 15:29:42,436 (trainer:338) INFO: 26epoch results: [train] iter_time=0.204, forward_time=0.147, loss_ctc=72.927, loss_att=55.820, acc=0.700, loss=60.952, backward_time=1.031, grad_norm=104.257, clip=100.000, loss_scale=1.511e+23, optim_step_time=0.183, optim0_lr0=7.143e-05, train_time=3.302, time=4 hours, 35 minutes and 23.69 seconds, total_count=230000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.054, cer_ctc=0.276, loss_att=40.224, acc=0.655, cer=0.436, wer=0.999, loss=42.573, time=5 minutes and 54.42 seconds, total_count=23782, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 57.87 seconds, total_count=0, gpu_max_cached_mem_GB=38.234
+[gpub005:0/64] 2023-07-08 15:29:57,930 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub005:0/64] 2023-07-08 15:29:57,939 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/21epoch.pth
+[gpub005:0/64] 2023-07-08 15:29:57,980 (trainer:272) INFO: 27/30epoch started. Estimated time to finish: 19 hours, 20 minutes and 32.41 seconds
+[gpub005:0/64] 2023-07-08 15:29:58,046 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-08 15:30:15,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 15:30:18,988 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f941ddf7970>)
+[gpub005:0/64] 2023-07-08 15:30:18,988 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-08 15:30:18,994 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 15:35:04,256 (trainer:732) INFO: 27epoch:train:1-100batch: iter_time=1.647, forward_time=0.168, loss_ctc=78.316, loss_att=58.961, acc=0.688, loss=64.768, backward_time=1.042, grad_norm=106.995, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.185, optim0_lr0=7.070e-05, train_time=6.125
+[gpub005:0/64] 2023-07-08 15:37:21,077 (trainer:732) INFO: 27epoch:train:101-200batch: iter_time=1.137e-04, forward_time=0.146, loss_ctc=65.781, loss_att=51.624, acc=0.711, loss=55.871, backward_time=1.029, grad_norm=94.992, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.069e-05, train_time=2.736
+[gpub005:0/64] 2023-07-08 15:39:39,144 (trainer:732) INFO: 27epoch:train:201-300batch: iter_time=1.198e-04, forward_time=0.145, loss_ctc=68.849, loss_att=52.320, acc=0.684, loss=57.279, backward_time=1.027, grad_norm=92.602, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.067e-05, train_time=2.761
+[gpub005:0/64] 2023-07-08 15:41:55,639 (trainer:732) INFO: 27epoch:train:301-400batch: iter_time=1.117e-04, forward_time=0.147, loss_ctc=89.419, loss_att=68.861, acc=0.699, loss=75.029, backward_time=1.031, grad_norm=131.850, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.066e-05, train_time=2.730
+[gpub005:0/64] 2023-07-08 15:44:12,562 (trainer:732) INFO: 27epoch:train:401-500batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=71.286, loss_att=59.711, acc=0.692, loss=63.183, backward_time=1.032, grad_norm=96.921, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.065e-05, train_time=2.738
+[gpub005:0/64] 2023-07-08 15:46:35,966 (trainer:732) INFO: 27epoch:train:501-600batch: iter_time=1.069e-04, forward_time=0.144, loss_ctc=63.026, loss_att=47.249, acc=0.702, loss=51.982, backward_time=1.050, grad_norm=101.613, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.063e-05, train_time=2.868
+[gpub005:0/64] 2023-07-08 15:48:57,030 (trainer:732) INFO: 27epoch:train:601-700batch: iter_time=1.063e-04, forward_time=0.146, loss_ctc=78.906, loss_att=55.763, acc=0.690, loss=62.706, backward_time=1.033, grad_norm=105.517, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.062e-05, train_time=2.821
+[gpub005:0/64] 2023-07-08 15:51:15,464 (trainer:732) INFO: 27epoch:train:701-800batch: iter_time=1.760e-04, forward_time=0.160, loss_ctc=71.619, loss_att=57.821, acc=0.699, loss=61.960, backward_time=1.029, grad_norm=99.845, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.060e-05, train_time=2.768
+[gpub005:0/64] 2023-07-08 15:52:10,389 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-08 15:52:28,184 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 15:52:31,555 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f941ddf5690>)
+[gpub005:0/64] 2023-07-08 15:52:31,555 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-08 15:52:31,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 15:56:21,957 (trainer:732) INFO: 27epoch:train:801-900batch: iter_time=1.505, forward_time=0.162, loss_ctc=78.800, loss_att=57.410, acc=0.692, loss=63.827, backward_time=1.046, grad_norm=114.718, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.184, optim0_lr0=7.059e-05, train_time=6.130
+[gpub005:0/64] 2023-07-08 15:58:38,204 (trainer:732) INFO: 27epoch:train:901-1000batch: iter_time=1.265e-04, forward_time=0.145, loss_ctc=70.277, loss_att=51.641, acc=0.699, loss=57.232, backward_time=1.026, grad_norm=108.240, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.058e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 16:00:54,182 (trainer:732) INFO: 27epoch:train:1001-1100batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=65.909, loss_att=52.213, acc=0.692, loss=56.322, backward_time=1.028, grad_norm=104.226, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.056e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 16:03:13,716 (trainer:732) INFO: 27epoch:train:1101-1200batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=75.613, loss_att=59.388, acc=0.694, loss=64.256, backward_time=1.033, grad_norm=118.002, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.055e-05, train_time=2.790
+[gpub005:0/64] 2023-07-08 16:05:29,735 (trainer:732) INFO: 27epoch:train:1201-1300batch: iter_time=1.145e-04, forward_time=0.147, loss_ctc=81.972, loss_att=64.720, acc=0.683, loss=69.896, backward_time=1.029, grad_norm=113.873, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.053e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 16:07:45,823 (trainer:732) INFO: 27epoch:train:1301-1400batch: iter_time=1.129e-04, forward_time=0.147, loss_ctc=66.096, loss_att=57.013, acc=0.672, loss=59.738, backward_time=1.028, grad_norm=94.201, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.052e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 16:10:01,331 (trainer:732) INFO: 27epoch:train:1401-1500batch: iter_time=1.266e-04, forward_time=0.146, loss_ctc=77.033, loss_att=53.359, acc=0.689, loss=60.461, backward_time=1.026, grad_norm=105.645, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.051e-05, train_time=2.710
+[gpub005:0/64] 2023-07-08 16:12:16,550 (trainer:732) INFO: 27epoch:train:1501-1600batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=64.493, loss_att=49.540, acc=0.689, loss=54.026, backward_time=1.024, grad_norm=104.991, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.049e-05, train_time=2.704
+[gpub005:0/64] 2023-07-08 16:13:52,617 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-08 16:14:10,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 16:14:14,392 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f935362fdf0>)
+[gpub005:0/64] 2023-07-08 16:14:14,392 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-08 16:14:14,398 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 16:17:12,466 (trainer:732) INFO: 27epoch:train:1601-1700batch: iter_time=1.393, forward_time=0.146, loss_ctc=72.251, loss_att=57.571, acc=0.694, loss=61.975, backward_time=1.037, grad_norm=118.951, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.048e-05, train_time=5.918
+[gpub005:0/64] 2023-07-08 16:19:29,351 (trainer:732) INFO: 27epoch:train:1701-1800batch: iter_time=1.086e-04, forward_time=0.147, loss_ctc=72.839, loss_att=54.944, acc=0.702, loss=60.312, backward_time=1.033, grad_norm=123.622, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.046e-05, train_time=2.737
+[gpub005:0/64] 2023-07-08 16:21:45,108 (trainer:732) INFO: 27epoch:train:1801-1900batch: iter_time=1.087e-04, forward_time=0.145, loss_ctc=61.753, loss_att=49.031, acc=0.707, loss=52.847, backward_time=1.027, grad_norm=99.456, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.045e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 16:24:00,983 (trainer:732) INFO: 27epoch:train:1901-2000batch: iter_time=1.038e-04, forward_time=0.145, loss_ctc=79.566, loss_att=60.500, acc=0.699, loss=66.220, backward_time=1.028, grad_norm=103.999, clip=100.000, loss_scale=3.022e+23, optim_step_time=0.183, optim0_lr0=7.044e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 16:26:17,002 (trainer:732) INFO: 27epoch:train:2001-2100batch: iter_time=1.062e-04, forward_time=0.146, loss_ctc=79.269, loss_att=61.205, acc=0.700, loss=66.624, backward_time=1.029, grad_norm=134.425, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.042e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 16:28:33,016 (trainer:732) INFO: 27epoch:train:2101-2200batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=68.277, loss_att=59.362, acc=0.687, loss=62.036, backward_time=1.029, grad_norm=98.602, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.041e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 16:30:48,891 (trainer:732) INFO: 27epoch:train:2201-2300batch: iter_time=1.125e-04, forward_time=0.146, loss_ctc=75.472, loss_att=52.615, acc=0.695, loss=59.472, backward_time=1.028, grad_norm=107.785, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.039e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 16:33:04,669 (trainer:732) INFO: 27epoch:train:2301-2400batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=65.943, loss_att=50.458, acc=0.702, loss=55.104, backward_time=1.027, grad_norm=108.478, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.038e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 16:35:20,434 (trainer:732) INFO: 27epoch:train:2401-2500batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=73.144, loss_att=58.864, acc=0.697, loss=63.148, backward_time=1.027, grad_norm=94.710, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.037e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 16:35:23,306 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-08 16:35:41,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 16:35:44,939 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949bd7a260>)
+[gpub005:0/64] 2023-07-08 16:35:44,939 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-08 16:35:44,946 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 16:41:04,598 (trainer:732) INFO: 27epoch:train:2501-2600batch: iter_time=1.263, forward_time=0.173, loss_ctc=80.201, loss_att=57.657, acc=0.706, loss=64.420, backward_time=1.041, grad_norm=123.403, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.035e-05, train_time=6.882
+[gpub005:0/64] 2023-07-08 16:43:21,200 (trainer:732) INFO: 27epoch:train:2601-2700batch: iter_time=1.166e-04, forward_time=0.148, loss_ctc=63.267, loss_att=46.459, acc=0.706, loss=51.501, backward_time=1.029, grad_norm=87.589, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.034e-05, train_time=2.732
+[gpub005:0/64] 2023-07-08 16:45:37,296 (trainer:732) INFO: 27epoch:train:2701-2800batch: iter_time=1.226e-04, forward_time=0.148, loss_ctc=68.250, loss_att=52.710, acc=0.702, loss=57.372, backward_time=1.030, grad_norm=105.430, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.184, optim0_lr0=7.032e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 16:47:53,627 (trainer:732) INFO: 27epoch:train:2801-2900batch: iter_time=1.205e-04, forward_time=0.148, loss_ctc=82.041, loss_att=65.147, acc=0.699, loss=70.215, backward_time=1.032, grad_norm=109.986, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.031e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 16:50:09,459 (trainer:732) INFO: 27epoch:train:2901-3000batch: iter_time=1.224e-04, forward_time=0.148, loss_ctc=73.529, loss_att=60.131, acc=0.703, loss=64.150, backward_time=1.028, grad_norm=96.408, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.030e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 16:52:25,206 (trainer:732) INFO: 27epoch:train:3001-3100batch: iter_time=1.059e-04, forward_time=0.147, loss_ctc=60.358, loss_att=49.299, acc=0.695, loss=52.616, backward_time=1.028, grad_norm=102.512, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.028e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 16:54:41,021 (trainer:732) INFO: 27epoch:train:3101-3200batch: iter_time=1.048e-04, forward_time=0.148, loss_ctc=78.368, loss_att=54.425, acc=0.688, loss=61.608, backward_time=1.028, grad_norm=94.380, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.027e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 16:56:56,755 (trainer:732) INFO: 27epoch:train:3201-3300batch: iter_time=1.043e-04, forward_time=0.147, loss_ctc=68.893, loss_att=56.211, acc=0.703, loss=60.016, backward_time=1.027, grad_norm=106.588, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.026e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 16:57:52,426 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-08 16:58:10,903 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 16:58:14,326 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9353658310>)
+[gpub005:0/64] 2023-07-08 16:58:14,326 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-08 16:58:14,332 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 17:03:42,753 (trainer:732) INFO: 27epoch:train:3301-3400batch: iter_time=1.708, forward_time=0.146, loss_ctc=79.437, loss_att=60.997, acc=0.686, loss=66.529, backward_time=1.042, grad_norm=148.834, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.024e-05, train_time=8.120
+[gpub005:0/64] 2023-07-08 17:05:58,787 (trainer:732) INFO: 27epoch:train:3401-3500batch: iter_time=1.422e-04, forward_time=0.145, loss_ctc=63.091, loss_att=46.527, acc=0.710, loss=51.497, backward_time=1.026, grad_norm=97.048, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.023e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 17:08:14,662 (trainer:732) INFO: 27epoch:train:3501-3600batch: iter_time=1.409e-04, forward_time=0.146, loss_ctc=65.612, loss_att=50.551, acc=0.702, loss=55.069, backward_time=1.028, grad_norm=90.485, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.021e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 17:10:30,437 (trainer:732) INFO: 27epoch:train:3601-3700batch: iter_time=1.452e-04, forward_time=0.145, loss_ctc=86.543, loss_att=66.397, acc=0.689, loss=72.441, backward_time=1.027, grad_norm=123.487, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.020e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 17:12:46,222 (trainer:732) INFO: 27epoch:train:3701-3800batch: iter_time=1.172e-04, forward_time=0.146, loss_ctc=72.226, loss_att=59.576, acc=0.683, loss=63.371, backward_time=1.029, grad_norm=110.341, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.019e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 17:15:01,954 (trainer:732) INFO: 27epoch:train:3801-3900batch: iter_time=1.262e-04, forward_time=0.147, loss_ctc=62.765, loss_att=50.118, acc=0.692, loss=53.912, backward_time=1.026, grad_norm=108.059, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.017e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 17:17:17,653 (trainer:732) INFO: 27epoch:train:3901-4000batch: iter_time=1.348e-04, forward_time=0.146, loss_ctc=79.044, loss_att=57.038, acc=0.681, loss=63.640, backward_time=1.027, grad_norm=113.104, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.016e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 17:19:33,486 (trainer:732) INFO: 27epoch:train:4001-4100batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.698, loss_att=54.565, acc=0.698, loss=58.505, backward_time=1.027, grad_norm=87.633, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.014e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 17:21:07,242 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-08 17:21:25,710 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 17:21:29,182 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b47afcd0>)
+[gpub005:0/64] 2023-07-08 17:21:29,182 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-08 17:21:29,188 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 17:25:07,290 (trainer:732) INFO: 27epoch:train:4101-4200batch: iter_time=1.312, forward_time=0.147, loss_ctc=74.376, loss_att=54.791, acc=0.701, loss=60.666, backward_time=1.035, grad_norm=132.213, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.013e-05, train_time=6.676
+[gpub005:0/64] 2023-07-08 17:27:23,791 (trainer:732) INFO: 27epoch:train:4201-4300batch: iter_time=1.339e-04, forward_time=0.147, loss_ctc=70.773, loss_att=52.734, acc=0.694, loss=58.146, backward_time=1.029, grad_norm=107.084, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.012e-05, train_time=2.730
+[gpub005:0/64] 2023-07-08 17:29:39,208 (trainer:732) INFO: 27epoch:train:4301-4400batch: iter_time=1.474e-04, forward_time=0.146, loss_ctc=64.956, loss_att=51.106, acc=0.710, loss=55.261, backward_time=1.025, grad_norm=85.824, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.010e-05, train_time=2.708
+[gpub005:0/64] 2023-07-08 17:31:59,903 (trainer:732) INFO: 27epoch:train:4401-4500batch: iter_time=1.395e-04, forward_time=0.147, loss_ctc=68.008, loss_att=54.043, acc=0.690, loss=58.233, backward_time=1.031, grad_norm=87.203, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.009e-05, train_time=2.814
+[gpub005:0/64] 2023-07-08 17:34:15,707 (trainer:732) INFO: 27epoch:train:4501-4600batch: iter_time=1.354e-04, forward_time=0.147, loss_ctc=83.914, loss_att=63.789, acc=0.692, loss=69.826, backward_time=1.027, grad_norm=114.171, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.008e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 17:36:31,380 (trainer:732) INFO: 27epoch:train:4601-4700batch: iter_time=1.455e-04, forward_time=0.148, loss_ctc=69.598, loss_att=58.292, acc=0.684, loss=61.684, backward_time=1.026, grad_norm=93.514, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.006e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 17:38:47,070 (trainer:732) INFO: 27epoch:train:4701-4800batch: iter_time=1.468e-04, forward_time=0.148, loss_ctc=62.231, loss_att=46.924, acc=0.696, loss=51.516, backward_time=1.027, grad_norm=93.136, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.005e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 17:41:07,645 (trainer:732) INFO: 27epoch:train:4801-4900batch: iter_time=1.199e-04, forward_time=0.146, loss_ctc=74.797, loss_att=53.529, acc=0.688, loss=59.909, backward_time=1.028, grad_norm=106.798, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.003e-05, train_time=2.811
+[gpub005:0/64] 2023-07-08 17:43:23,525 (trainer:732) INFO: 27epoch:train:4901-5000batch: iter_time=9.656e-05, forward_time=0.145, loss_ctc=69.547, loss_att=55.357, acc=0.699, loss=59.614, backward_time=1.027, grad_norm=88.177, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.002e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 17:43:26,168 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-08 17:43:44,478 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 17:43:47,904 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f936bb7c160>)
+[gpub005:0/64] 2023-07-08 17:43:47,905 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-08 17:43:47,911 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 17:48:43,183 (trainer:732) INFO: 27epoch:train:5001-5100batch: iter_time=1.300, forward_time=0.156, loss_ctc=81.007, loss_att=59.315, acc=0.698, loss=65.823, backward_time=1.042, grad_norm=126.574, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=7.001e-05, train_time=6.393
+[gpub005:0/64] 2023-07-08 17:50:59,355 (trainer:732) INFO: 27epoch:train:5101-5200batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=63.117, loss_att=45.507, acc=0.712, loss=50.790, backward_time=1.029, grad_norm=105.642, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.999e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 17:53:15,220 (trainer:732) INFO: 27epoch:train:5201-5300batch: iter_time=1.242e-04, forward_time=0.146, loss_ctc=66.674, loss_att=51.779, acc=0.705, loss=56.248, backward_time=1.028, grad_norm=93.017, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.998e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 17:55:31,355 (trainer:732) INFO: 27epoch:train:5301-5400batch: iter_time=1.277e-04, forward_time=0.145, loss_ctc=80.758, loss_att=65.051, acc=0.690, loss=69.763, backward_time=1.028, grad_norm=115.695, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.997e-05, train_time=2.722
+[gpub005:0/64] 2023-07-08 17:57:46,927 (trainer:732) INFO: 27epoch:train:5401-5500batch: iter_time=1.098e-04, forward_time=0.144, loss_ctc=74.950, loss_att=60.692, acc=0.688, loss=64.970, backward_time=1.026, grad_norm=100.041, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.995e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 18:00:02,552 (trainer:732) INFO: 27epoch:train:5501-5600batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=60.364, loss_att=48.962, acc=0.693, loss=52.383, backward_time=1.025, grad_norm=89.373, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.994e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 18:02:18,130 (trainer:732) INFO: 27epoch:train:5601-5700batch: iter_time=1.266e-04, forward_time=0.145, loss_ctc=77.107, loss_att=53.744, acc=0.684, loss=60.753, backward_time=1.027, grad_norm=102.369, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.992e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 18:04:33,692 (trainer:732) INFO: 27epoch:train:5701-5800batch: iter_time=1.235e-04, forward_time=0.146, loss_ctc=67.366, loss_att=53.893, acc=0.700, loss=57.935, backward_time=1.026, grad_norm=103.715, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.991e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 18:05:27,389 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-08 18:05:45,505 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 18:05:48,945 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950387bfa0>)
+[gpub005:0/64] 2023-07-08 18:05:48,946 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-08 18:05:48,952 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 18:09:46,628 (trainer:732) INFO: 27epoch:train:5801-5900batch: iter_time=1.481, forward_time=0.206, loss_ctc=77.422, loss_att=54.274, acc=0.707, loss=61.219, backward_time=1.053, grad_norm=108.922, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.187, optim0_lr0=6.990e-05, train_time=6.259
+[gpub005:0/64] 2023-07-08 18:12:03,207 (trainer:732) INFO: 27epoch:train:5901-6000batch: iter_time=1.133e-04, forward_time=0.147, loss_ctc=67.668, loss_att=49.007, acc=0.719, loss=54.605, backward_time=1.029, grad_norm=105.149, clip=100.000, loss_scale=6.045e+23, optim_step_time=0.183, optim0_lr0=6.988e-05, train_time=2.731
+[gpub005:0/64] 2023-07-08 18:14:18,983 (trainer:732) INFO: 27epoch:train:6001-6100batch: iter_time=1.019e-04, forward_time=0.146, loss_ctc=65.649, loss_att=52.644, acc=0.704, loss=56.546, backward_time=1.029, grad_norm=82.415, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.987e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 18:16:35,266 (trainer:732) INFO: 27epoch:train:6101-6200batch: iter_time=1.118e-04, forward_time=0.147, loss_ctc=74.745, loss_att=57.661, acc=0.710, loss=62.786, backward_time=1.032, grad_norm=103.839, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.986e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 18:18:51,419 (trainer:732) INFO: 27epoch:train:6201-6300batch: iter_time=1.098e-04, forward_time=0.147, loss_ctc=80.242, loss_att=63.110, acc=0.705, loss=68.249, backward_time=1.031, grad_norm=123.740, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.984e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 18:21:07,449 (trainer:732) INFO: 27epoch:train:6301-6400batch: iter_time=1.090e-04, forward_time=0.147, loss_ctc=64.917, loss_att=56.070, acc=0.689, loss=58.724, backward_time=1.029, grad_norm=94.389, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.983e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 18:23:23,293 (trainer:732) INFO: 27epoch:train:6401-6500batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=75.776, loss_att=53.223, acc=0.701, loss=59.989, backward_time=1.026, grad_norm=128.056, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.982e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 18:25:41,985 (trainer:732) INFO: 27epoch:train:6501-6600batch: iter_time=1.252e-04, forward_time=0.146, loss_ctc=63.831, loss_att=48.758, acc=0.705, loss=53.280, backward_time=1.030, grad_norm=111.720, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.980e-05, train_time=2.774
+[gpub005:0/64] 2023-07-08 18:27:20,934 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-08 18:27:39,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 18:27:42,618 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f936bb6b790>)
+[gpub005:0/64] 2023-07-08 18:27:42,618 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-08 18:27:42,625 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 18:31:54,283 (trainer:732) INFO: 27epoch:train:6601-6700batch: iter_time=1.661, forward_time=0.168, loss_ctc=78.094, loss_att=55.859, acc=0.715, loss=62.530, backward_time=1.038, grad_norm=110.280, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.979e-05, train_time=7.445
+[gpub005:0/64] 2023-07-08 18:34:11,650 (trainer:732) INFO: 27epoch:train:6701-6800batch: iter_time=1.183e-04, forward_time=0.147, loss_ctc=71.392, loss_att=52.314, acc=0.710, loss=58.037, backward_time=1.032, grad_norm=109.454, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.977e-05, train_time=2.748
+[gpub005:0/64] 2023-07-08 18:36:27,985 (trainer:732) INFO: 27epoch:train:6801-6900batch: iter_time=1.155e-04, forward_time=0.147, loss_ctc=64.101, loss_att=51.370, acc=0.716, loss=55.189, backward_time=1.029, grad_norm=109.963, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.976e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 18:38:44,206 (trainer:732) INFO: 27epoch:train:6901-7000batch: iter_time=1.189e-04, forward_time=0.145, loss_ctc=68.462, loss_att=54.516, acc=0.697, loss=58.700, backward_time=1.029, grad_norm=85.796, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.975e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 18:41:00,068 (trainer:732) INFO: 27epoch:train:7001-7100batch: iter_time=1.081e-04, forward_time=0.146, loss_ctc=84.776, loss_att=62.796, acc=0.708, loss=69.390, backward_time=1.027, grad_norm=105.049, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.973e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 18:43:15,964 (trainer:732) INFO: 27epoch:train:7101-7200batch: iter_time=1.084e-04, forward_time=0.147, loss_ctc=70.156, loss_att=58.438, acc=0.703, loss=61.953, backward_time=1.028, grad_norm=91.886, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.972e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 18:45:31,769 (trainer:732) INFO: 27epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.147, loss_ctc=62.426, loss_att=47.189, acc=0.712, loss=51.760, backward_time=1.027, grad_norm=99.793, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.971e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 18:47:47,563 (trainer:732) INFO: 27epoch:train:7301-7400batch: iter_time=1.171e-04, forward_time=0.146, loss_ctc=74.215, loss_att=52.281, acc=0.700, loss=58.861, backward_time=1.029, grad_norm=103.237, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.969e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 18:50:03,368 (trainer:732) INFO: 27epoch:train:7401-7500batch: iter_time=1.113e-04, forward_time=0.147, loss_ctc=69.100, loss_att=56.675, acc=0.701, loss=60.403, backward_time=1.028, grad_norm=93.451, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.968e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 18:50:06,411 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-08 18:50:24,992 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 18:50:28,681 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93459bfd60>)
+[gpub005:0/64] 2023-07-08 18:50:28,681 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-08 18:50:28,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 18:56:41,552 (trainer:732) INFO: 27epoch:train:7501-7600batch: iter_time=1.468, forward_time=0.146, loss_ctc=79.702, loss_att=58.690, acc=0.704, loss=64.993, backward_time=1.043, grad_norm=117.599, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.967e-05, train_time=7.963
+[gpub005:0/64] 2023-07-08 18:58:59,435 (trainer:732) INFO: 27epoch:train:7601-7700batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=62.698, loss_att=45.702, acc=0.715, loss=50.801, backward_time=1.030, grad_norm=86.375, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.965e-05, train_time=2.758
+[gpub005:0/64] 2023-07-08 19:01:20,171 (trainer:732) INFO: 27epoch:train:7701-7800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=68.524, loss_att=51.985, acc=0.704, loss=56.947, backward_time=1.041, grad_norm=108.704, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.964e-05, train_time=2.815
+[gpub005:0/64] 2023-07-08 19:03:36,317 (trainer:732) INFO: 27epoch:train:7801-7900batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=82.146, loss_att=67.111, acc=0.692, loss=71.621, backward_time=1.030, grad_norm=116.046, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.963e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 19:05:52,107 (trainer:732) INFO: 27epoch:train:7901-8000batch: iter_time=1.352e-04, forward_time=0.146, loss_ctc=71.994, loss_att=57.649, acc=0.695, loss=61.952, backward_time=1.025, grad_norm=131.857, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.961e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 19:08:09,009 (trainer:732) INFO: 27epoch:train:8001-8100batch: iter_time=1.230e-04, forward_time=0.147, loss_ctc=61.132, loss_att=49.614, acc=0.689, loss=53.069, backward_time=1.032, grad_norm=103.490, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.960e-05, train_time=2.738
+[gpub005:0/64] 2023-07-08 19:10:25,679 (trainer:732) INFO: 27epoch:train:8101-8200batch: iter_time=1.177e-04, forward_time=0.144, loss_ctc=77.024, loss_att=54.171, acc=0.687, loss=61.027, backward_time=1.027, grad_norm=100.882, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.959e-05, train_time=2.733
+[gpub005:0/64] 2023-07-08 19:12:41,248 (trainer:732) INFO: 27epoch:train:8201-8300batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=66.224, loss_att=53.289, acc=0.703, loss=57.170, backward_time=1.025, grad_norm=103.207, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.957e-05, train_time=2.711
+[gpub005:0/64] 2023-07-08 19:13:32,010 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-08 19:13:50,361 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 19:13:54,092 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9351697a90>)
+[gpub005:0/64] 2023-07-08 19:13:54,092 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-08 19:13:54,099 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 19:19:15,593 (trainer:732) INFO: 27epoch:train:8301-8400batch: iter_time=1.514, forward_time=0.147, loss_ctc=77.360, loss_att=60.550, acc=0.688, loss=65.593, backward_time=1.044, grad_norm=115.246, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.956e-05, train_time=7.887
+[gpub005:0/64] 2023-07-08 19:21:32,453 (trainer:732) INFO: 27epoch:train:8401-8500batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=63.582, loss_att=45.791, acc=0.714, loss=51.128, backward_time=1.026, grad_norm=87.132, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.954e-05, train_time=2.737
+[gpub005:0/64] 2023-07-08 19:23:49,755 (trainer:732) INFO: 27epoch:train:8501-8600batch: iter_time=1.181e-04, forward_time=0.145, loss_ctc=68.442, loss_att=51.771, acc=0.700, loss=56.772, backward_time=1.029, grad_norm=94.365, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.953e-05, train_time=2.746
+[gpub005:0/64] 2023-07-08 19:26:06,069 (trainer:732) INFO: 27epoch:train:8601-8700batch: iter_time=1.217e-04, forward_time=0.145, loss_ctc=82.786, loss_att=63.906, acc=0.692, loss=69.570, backward_time=1.027, grad_norm=126.559, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.952e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 19:28:22,597 (trainer:732) INFO: 27epoch:train:8701-8800batch: iter_time=1.244e-04, forward_time=0.147, loss_ctc=70.784, loss_att=58.190, acc=0.689, loss=61.968, backward_time=1.029, grad_norm=121.368, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.950e-05, train_time=2.730
+[gpub005:0/64] 2023-07-08 19:30:38,374 (trainer:732) INFO: 27epoch:train:8801-8900batch: iter_time=1.201e-04, forward_time=0.147, loss_ctc=60.373, loss_att=48.428, acc=0.694, loss=52.011, backward_time=1.028, grad_norm=99.372, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.949e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 19:32:53,800 (trainer:732) INFO: 27epoch:train:8901-9000batch: iter_time=1.115e-04, forward_time=0.145, loss_ctc=76.119, loss_att=54.793, acc=0.685, loss=61.191, backward_time=1.026, grad_norm=101.698, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.948e-05, train_time=2.708
+[gpub005:0/64] 2023-07-08 19:35:09,452 (trainer:732) INFO: 27epoch:train:9001-9100batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=67.817, loss_att=54.263, acc=0.701, loss=58.329, backward_time=1.028, grad_norm=89.440, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.946e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 19:36:41,580 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-08 19:36:59,716 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 19:37:03,195 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92f8d8b490>)
+[gpub005:0/64] 2023-07-08 19:37:03,195 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-08 19:37:03,201 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 19:41:40,242 (trainer:732) INFO: 27epoch:train:9101-9200batch: iter_time=1.268, forward_time=0.157, loss_ctc=74.223, loss_att=53.143, acc=0.708, loss=59.467, backward_time=1.037, grad_norm=111.466, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.184, optim0_lr0=6.945e-05, train_time=7.816
+[gpub005:0/64] 2023-07-08 19:43:56,604 (trainer:732) INFO: 27epoch:train:9201-9300batch: iter_time=1.297e-04, forward_time=0.146, loss_ctc=71.313, loss_att=52.868, acc=0.710, loss=58.401, backward_time=1.029, grad_norm=113.150, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.944e-05, train_time=2.727
+[gpub005:0/64] 2023-07-08 19:46:13,420 (trainer:732) INFO: 27epoch:train:9301-9400batch: iter_time=1.309e-04, forward_time=0.145, loss_ctc=64.237, loss_att=50.871, acc=0.717, loss=54.881, backward_time=1.026, grad_norm=98.725, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.942e-05, train_time=2.736
+[gpub005:0/64] 2023-07-08 19:48:30,885 (trainer:732) INFO: 27epoch:train:9401-9500batch: iter_time=1.306e-04, forward_time=0.145, loss_ctc=67.251, loss_att=53.854, acc=0.700, loss=57.873, backward_time=1.028, grad_norm=86.885, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.941e-05, train_time=2.749
+[gpub005:0/64] 2023-07-08 19:50:47,871 (trainer:732) INFO: 27epoch:train:9501-9600batch: iter_time=1.344e-04, forward_time=0.146, loss_ctc=82.998, loss_att=63.271, acc=0.704, loss=69.189, backward_time=1.029, grad_norm=100.681, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.940e-05, train_time=2.740
+[gpub005:0/64] 2023-07-08 19:53:04,040 (trainer:732) INFO: 27epoch:train:9601-9700batch: iter_time=1.091e-04, forward_time=0.147, loss_ctc=69.726, loss_att=58.788, acc=0.700, loss=62.069, backward_time=1.030, grad_norm=100.765, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.938e-05, train_time=2.723
+[gpub005:0/64] 2023-07-08 19:55:19,699 (trainer:732) INFO: 27epoch:train:9701-9800batch: iter_time=1.084e-04, forward_time=0.147, loss_ctc=62.256, loss_att=46.889, acc=0.713, loss=51.499, backward_time=1.028, grad_norm=99.634, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.937e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 19:57:35,506 (trainer:732) INFO: 27epoch:train:9801-9900batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=73.700, loss_att=52.435, acc=0.704, loss=58.814, backward_time=1.028, grad_norm=105.162, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.936e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 19:59:50,966 (trainer:732) INFO: 27epoch:train:9901-10000batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=69.244, loss_att=56.460, acc=0.701, loss=60.295, backward_time=1.026, grad_norm=95.619, clip=100.000, loss_scale=1.209e+24, optim_step_time=0.183, optim0_lr0=6.934e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 20:13:31,090 (trainer:338) INFO: 27epoch results: [train] iter_time=0.175, forward_time=0.148, loss_ctc=71.614, loss_att=55.101, acc=0.698, loss=60.055, backward_time=1.030, grad_norm=105.167, clip=100.000, loss_scale=7.858e+23, optim_step_time=0.183, optim0_lr0=7.002e-05, train_time=3.238, time=4 hours, 30 minutes and 8.18 seconds, total_count=240000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.741, cer_ctc=0.274, loss_att=40.713, acc=0.655, cer=0.437, wer=1.000, loss=43.121, time=7 minutes and 6.84 seconds, total_count=24794, gpu_max_cached_mem_GB=38.234, [att_plot] time=6 minutes and 18.07 seconds, total_count=0, gpu_max_cached_mem_GB=38.234
+[gpub005:0/64] 2023-07-08 20:13:46,534 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub005:0/64] 2023-07-08 20:13:46,675 (trainer:272) INFO: 28/30epoch started. Estimated time to finish: 14 hours, 26 minutes and 36.64 seconds
+[gpub005:0/64] 2023-07-08 20:13:46,742 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-08 20:14:06,778 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 20:14:10,729 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa0b54f7970>)
+[gpub005:0/64] 2023-07-08 20:14:10,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-08 20:14:10,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 20:20:40,880 (trainer:732) INFO: 28epoch:train:1-100batch: iter_time=2.719, forward_time=0.161, loss_ctc=74.009, loss_att=55.912, acc=0.677, loss=61.341, backward_time=1.048, grad_norm=106.864, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.187, optim0_lr0=6.933e-05, train_time=8.284
+[gpub005:0/64] 2023-07-08 20:22:58,189 (trainer:732) INFO: 28epoch:train:101-200batch: iter_time=1.283e-04, forward_time=0.146, loss_ctc=77.783, loss_att=59.524, acc=0.688, loss=65.002, backward_time=1.028, grad_norm=110.041, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.932e-05, train_time=2.746
+[gpub005:0/64] 2023-07-08 20:25:14,175 (trainer:732) INFO: 28epoch:train:201-300batch: iter_time=1.274e-04, forward_time=0.146, loss_ctc=78.368, loss_att=57.773, acc=0.689, loss=63.952, backward_time=1.027, grad_norm=110.026, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.930e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 20:27:33,894 (trainer:732) INFO: 28epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.166, loss_ctc=86.210, loss_att=62.958, acc=0.681, loss=69.933, backward_time=1.033, grad_norm=125.836, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.185, optim0_lr0=6.929e-05, train_time=2.794
+[gpub005:0/64] 2023-07-08 20:29:54,389 (trainer:732) INFO: 28epoch:train:401-500batch: iter_time=1.213e-04, forward_time=0.144, loss_ctc=68.699, loss_att=49.263, acc=0.703, loss=55.094, backward_time=1.030, grad_norm=113.824, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.928e-05, train_time=2.810
+[gpub005:0/64] 2023-07-08 20:32:17,226 (trainer:732) INFO: 28epoch:train:501-600batch: iter_time=1.182e-04, forward_time=0.145, loss_ctc=56.148, loss_att=39.661, acc=0.713, loss=44.607, backward_time=1.035, grad_norm=83.985, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.926e-05, train_time=2.857
+[gpub005:0/64] 2023-07-08 20:34:34,235 (trainer:732) INFO: 28epoch:train:601-700batch: iter_time=1.286e-04, forward_time=0.144, loss_ctc=72.009, loss_att=54.099, acc=0.698, loss=59.472, backward_time=1.032, grad_norm=134.899, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.925e-05, train_time=2.740
+[gpub005:0/64] 2023-07-08 20:36:55,642 (trainer:732) INFO: 28epoch:train:701-800batch: iter_time=1.265e-04, forward_time=0.144, loss_ctc=71.921, loss_att=54.806, acc=0.689, loss=59.941, backward_time=1.031, grad_norm=100.285, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.924e-05, train_time=2.828
+[gpub005:0/64] 2023-07-08 20:37:44,836 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-08 20:38:02,541 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 20:38:06,124 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949a21bfa0>)
+[gpub005:0/64] 2023-07-08 20:38:06,124 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-08 20:38:06,130 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 20:41:59,143 (trainer:732) INFO: 28epoch:train:801-900batch: iter_time=1.278, forward_time=0.167, loss_ctc=83.852, loss_att=67.054, acc=0.679, loss=72.094, backward_time=1.041, grad_norm=123.747, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.922e-05, train_time=6.070
+[gpub005:0/64] 2023-07-08 20:44:16,329 (trainer:732) INFO: 28epoch:train:901-1000batch: iter_time=1.260e-04, forward_time=0.146, loss_ctc=72.535, loss_att=53.389, acc=0.704, loss=59.133, backward_time=1.032, grad_norm=104.932, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.921e-05, train_time=2.743
+[gpub005:0/64] 2023-07-08 20:46:32,836 (trainer:732) INFO: 28epoch:train:1001-1100batch: iter_time=1.159e-04, forward_time=0.146, loss_ctc=81.503, loss_att=62.255, acc=0.693, loss=68.029, backward_time=1.030, grad_norm=104.731, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.920e-05, train_time=2.730
+[gpub005:0/64] 2023-07-08 20:48:49,066 (trainer:732) INFO: 28epoch:train:1101-1200batch: iter_time=1.186e-04, forward_time=0.145, loss_ctc=82.031, loss_att=62.754, acc=0.695, loss=68.537, backward_time=1.030, grad_norm=126.182, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.918e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 20:51:04,740 (trainer:732) INFO: 28epoch:train:1201-1300batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=68.969, loss_att=49.412, acc=0.698, loss=55.279, backward_time=1.027, grad_norm=110.693, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.917e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 20:53:20,512 (trainer:732) INFO: 28epoch:train:1301-1400batch: iter_time=1.139e-04, forward_time=0.145, loss_ctc=63.955, loss_att=43.837, acc=0.724, loss=49.873, backward_time=1.027, grad_norm=86.116, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.916e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 20:55:36,243 (trainer:732) INFO: 28epoch:train:1401-1500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=62.174, loss_att=44.982, acc=0.721, loss=50.140, backward_time=1.029, grad_norm=99.535, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.914e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 20:57:52,044 (trainer:732) INFO: 28epoch:train:1501-1600batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=75.891, loss_att=56.730, acc=0.703, loss=62.479, backward_time=1.029, grad_norm=135.198, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.913e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 20:59:33,182 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-08 20:59:51,725 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 20:59:55,498 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f955b81f5b0>)
+[gpub005:0/64] 2023-07-08 20:59:55,498 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-08 20:59:55,504 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 21:03:46,841 (trainer:732) INFO: 28epoch:train:1601-1700batch: iter_time=1.392, forward_time=0.151, loss_ctc=80.220, loss_att=63.955, acc=0.696, loss=68.834, backward_time=1.039, grad_norm=127.333, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.189, optim0_lr0=6.912e-05, train_time=7.095
+[gpub005:0/64] 2023-07-08 21:06:03,098 (trainer:732) INFO: 28epoch:train:1701-1800batch: iter_time=1.185e-04, forward_time=0.146, loss_ctc=72.712, loss_att=55.287, acc=0.677, loss=60.514, backward_time=1.029, grad_norm=100.144, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.910e-05, train_time=2.726
+[gpub005:0/64] 2023-07-08 21:08:19,000 (trainer:732) INFO: 28epoch:train:1801-1900batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=76.947, loss_att=57.906, acc=0.692, loss=63.618, backward_time=1.027, grad_norm=115.473, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.909e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 21:10:35,710 (trainer:732) INFO: 28epoch:train:1901-2000batch: iter_time=1.219e-04, forward_time=0.147, loss_ctc=76.026, loss_att=56.343, acc=0.691, loss=62.248, backward_time=1.031, grad_norm=107.646, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.908e-05, train_time=2.734
+[gpub005:0/64] 2023-07-08 21:12:54,818 (trainer:732) INFO: 28epoch:train:2001-2100batch: iter_time=1.131e-04, forward_time=0.145, loss_ctc=83.541, loss_att=60.894, acc=0.684, loss=67.688, backward_time=1.030, grad_norm=115.416, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.907e-05, train_time=2.782
+[gpub005:0/64] 2023-07-08 21:15:20,099 (trainer:732) INFO: 28epoch:train:2101-2200batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=67.313, loss_att=48.446, acc=0.713, loss=54.106, backward_time=1.040, grad_norm=89.259, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.905e-05, train_time=2.905
+[gpub005:0/64] 2023-07-08 21:17:35,563 (trainer:732) INFO: 28epoch:train:2201-2300batch: iter_time=1.140e-04, forward_time=0.145, loss_ctc=54.785, loss_att=40.768, acc=0.708, loss=44.973, backward_time=1.026, grad_norm=106.678, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.904e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 21:19:51,268 (trainer:732) INFO: 28epoch:train:2301-2400batch: iter_time=1.059e-04, forward_time=0.146, loss_ctc=70.891, loss_att=51.557, acc=0.706, loss=57.357, backward_time=1.028, grad_norm=101.405, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.903e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 21:22:06,949 (trainer:732) INFO: 28epoch:train:2401-2500batch: iter_time=1.033e-04, forward_time=0.146, loss_ctc=72.815, loss_att=54.854, acc=0.693, loss=60.242, backward_time=1.028, grad_norm=90.955, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.901e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 21:22:16,616 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-08 21:22:34,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 21:22:38,318 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9f30ae3b20>)
+[gpub005:0/64] 2023-07-08 21:22:38,318 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-08 21:22:38,324 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 21:28:14,931 (trainer:732) INFO: 28epoch:train:2501-2600batch: iter_time=2.263, forward_time=0.151, loss_ctc=73.063, loss_att=56.968, acc=0.690, loss=61.797, backward_time=1.043, grad_norm=109.171, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.900e-05, train_time=7.359
+[gpub005:0/64] 2023-07-08 21:30:31,457 (trainer:732) INFO: 28epoch:train:2601-2700batch: iter_time=1.187e-04, forward_time=0.145, loss_ctc=72.709, loss_att=55.640, acc=0.701, loss=60.761, backward_time=1.030, grad_norm=110.117, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.899e-05, train_time=2.730
+[gpub005:0/64] 2023-07-08 21:32:47,437 (trainer:732) INFO: 28epoch:train:2701-2800batch: iter_time=1.193e-04, forward_time=0.148, loss_ctc=81.634, loss_att=59.340, acc=0.699, loss=66.029, backward_time=1.029, grad_norm=103.358, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.897e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 21:35:03,491 (trainer:732) INFO: 28epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.147, loss_ctc=80.592, loss_att=60.457, acc=0.699, loss=66.498, backward_time=1.031, grad_norm=117.781, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.896e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 21:37:19,461 (trainer:732) INFO: 28epoch:train:2901-3000batch: iter_time=1.123e-04, forward_time=0.147, loss_ctc=68.204, loss_att=49.213, acc=0.699, loss=54.910, backward_time=1.029, grad_norm=90.669, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.895e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 21:39:35,137 (trainer:732) INFO: 28epoch:train:3001-3100batch: iter_time=1.116e-04, forward_time=0.147, loss_ctc=62.732, loss_att=41.881, acc=0.732, loss=48.137, backward_time=1.028, grad_norm=92.578, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.893e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 21:41:50,956 (trainer:732) INFO: 28epoch:train:3101-3200batch: iter_time=1.205e-04, forward_time=0.147, loss_ctc=63.201, loss_att=47.922, acc=0.716, loss=52.506, backward_time=1.028, grad_norm=82.434, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.892e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 21:44:06,741 (trainer:732) INFO: 28epoch:train:3201-3300batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=73.855, loss_att=52.520, acc=0.715, loss=58.920, backward_time=1.028, grad_norm=95.321, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.891e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 21:44:56,339 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-08 21:45:14,546 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 21:45:18,307 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9a1897f460>)
+[gpub005:0/64] 2023-07-08 21:45:18,308 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-08 21:45:18,314 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 21:49:50,797 (trainer:732) INFO: 28epoch:train:3301-3400batch: iter_time=1.410, forward_time=0.147, loss_ctc=67.104, loss_att=52.257, acc=0.697, loss=56.711, backward_time=1.043, grad_norm=105.434, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.889e-05, train_time=6.881
+[gpub005:0/64] 2023-07-08 21:52:07,052 (trainer:732) INFO: 28epoch:train:3401-3500batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=73.493, loss_att=55.952, acc=0.687, loss=61.214, backward_time=1.028, grad_norm=92.809, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.888e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 21:54:22,857 (trainer:732) INFO: 28epoch:train:3501-3600batch: iter_time=1.204e-04, forward_time=0.146, loss_ctc=81.704, loss_att=61.798, acc=0.682, loss=67.770, backward_time=1.027, grad_norm=108.375, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.887e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 21:56:38,614 (trainer:732) INFO: 28epoch:train:3601-3700batch: iter_time=1.192e-04, forward_time=0.145, loss_ctc=77.647, loss_att=60.096, acc=0.700, loss=65.361, backward_time=1.026, grad_norm=106.625, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.886e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 21:58:54,236 (trainer:732) INFO: 28epoch:train:3701-3800batch: iter_time=1.156e-04, forward_time=0.145, loss_ctc=74.147, loss_att=50.797, acc=0.688, loss=57.802, backward_time=1.026, grad_norm=102.912, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.884e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 22:01:09,914 (trainer:732) INFO: 28epoch:train:3801-3900batch: iter_time=1.187e-04, forward_time=0.144, loss_ctc=65.999, loss_att=48.726, acc=0.715, loss=53.908, backward_time=1.028, grad_norm=94.109, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.182, optim0_lr0=6.883e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 22:03:25,396 (trainer:732) INFO: 28epoch:train:3901-4000batch: iter_time=1.171e-04, forward_time=0.145, loss_ctc=57.597, loss_att=41.269, acc=0.713, loss=46.168, backward_time=1.026, grad_norm=85.071, clip=100.000, loss_scale=2.418e+24, optim_step_time=0.183, optim0_lr0=6.882e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 22:05:41,244 (trainer:732) INFO: 28epoch:train:4001-4100batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=76.578, loss_att=55.734, acc=0.705, loss=61.987, backward_time=1.029, grad_norm=122.779, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.880e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 22:07:13,468 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-08 22:07:31,740 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 22:07:35,467 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9f30900b20>)
+[gpub005:0/64] 2023-07-08 22:07:35,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-08 22:07:35,473 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 22:11:06,603 (trainer:732) INFO: 28epoch:train:4101-4200batch: iter_time=1.257, forward_time=0.146, loss_ctc=69.702, loss_att=51.949, acc=0.705, loss=57.275, backward_time=1.039, grad_norm=91.533, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.879e-05, train_time=6.507
+[gpub005:0/64] 2023-07-08 22:13:23,067 (trainer:732) INFO: 28epoch:train:4201-4300batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=72.398, loss_att=55.641, acc=0.682, loss=60.668, backward_time=1.030, grad_norm=93.124, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.878e-05, train_time=2.729
+[gpub005:0/64] 2023-07-08 22:15:38,757 (trainer:732) INFO: 28epoch:train:4301-4400batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=75.659, loss_att=56.297, acc=0.700, loss=62.106, backward_time=1.027, grad_norm=100.781, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.876e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 22:17:54,550 (trainer:732) INFO: 28epoch:train:4401-4500batch: iter_time=1.208e-04, forward_time=0.146, loss_ctc=76.934, loss_att=56.013, acc=0.694, loss=62.289, backward_time=1.027, grad_norm=103.762, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.875e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 22:20:10,415 (trainer:732) INFO: 28epoch:train:4501-4600batch: iter_time=1.173e-04, forward_time=0.146, loss_ctc=82.080, loss_att=60.106, acc=0.689, loss=66.698, backward_time=1.028, grad_norm=118.848, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.874e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 22:22:26,037 (trainer:732) INFO: 28epoch:train:4601-4700batch: iter_time=1.057e-04, forward_time=0.146, loss_ctc=66.639, loss_att=48.499, acc=0.718, loss=53.941, backward_time=1.027, grad_norm=90.376, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.873e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 22:24:41,674 (trainer:732) INFO: 28epoch:train:4701-4800batch: iter_time=1.013e-04, forward_time=0.147, loss_ctc=54.555, loss_att=40.462, acc=0.714, loss=44.690, backward_time=1.027, grad_norm=90.199, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.871e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 22:26:57,140 (trainer:732) INFO: 28epoch:train:4801-4900batch: iter_time=1.256e-04, forward_time=0.144, loss_ctc=69.488, loss_att=50.570, acc=0.708, loss=56.246, backward_time=1.025, grad_norm=110.521, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.870e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 22:29:26,869 (trainer:732) INFO: 28epoch:train:4901-5000batch: iter_time=1.214e-04, forward_time=0.173, loss_ctc=72.734, loss_att=55.263, acc=0.695, loss=60.504, backward_time=1.071, grad_norm=108.435, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.182, optim0_lr0=6.869e-05, train_time=2.994
+[gpub005:0/64] 2023-07-08 22:29:31,147 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-08 22:29:49,288 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 22:29:52,760 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9502936ec0>)
+[gpub005:0/64] 2023-07-08 22:29:52,760 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-08 22:29:52,766 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 22:35:12,143 (trainer:732) INFO: 28epoch:train:5001-5100batch: iter_time=1.294, forward_time=0.148, loss_ctc=72.245, loss_att=56.263, acc=0.695, loss=61.058, backward_time=1.050, grad_norm=99.183, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.867e-05, train_time=6.905
+[gpub005:0/64] 2023-07-08 22:37:28,422 (trainer:732) INFO: 28epoch:train:5101-5200batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=72.485, loss_att=56.428, acc=0.701, loss=61.245, backward_time=1.031, grad_norm=105.729, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.866e-05, train_time=2.725
+[gpub005:0/64] 2023-07-08 22:39:44,453 (trainer:732) INFO: 28epoch:train:5201-5300batch: iter_time=1.167e-04, forward_time=0.146, loss_ctc=82.505, loss_att=60.573, acc=0.698, loss=67.153, backward_time=1.030, grad_norm=117.962, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.865e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 22:42:00,678 (trainer:732) INFO: 28epoch:train:5301-5400batch: iter_time=1.214e-04, forward_time=0.147, loss_ctc=77.682, loss_att=59.365, acc=0.704, loss=64.860, backward_time=1.031, grad_norm=108.475, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.863e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 22:44:16,338 (trainer:732) INFO: 28epoch:train:5401-5500batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=70.316, loss_att=50.612, acc=0.699, loss=56.524, backward_time=1.028, grad_norm=95.879, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.862e-05, train_time=2.713
+[gpub005:0/64] 2023-07-08 22:46:32,135 (trainer:732) INFO: 28epoch:train:5501-5600batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=59.824, loss_att=40.367, acc=0.741, loss=46.204, backward_time=1.028, grad_norm=86.453, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.861e-05, train_time=2.716
+[gpub005:0/64] 2023-07-08 22:48:48,069 (trainer:732) INFO: 28epoch:train:5601-5700batch: iter_time=1.184e-04, forward_time=0.146, loss_ctc=62.478, loss_att=47.072, acc=0.721, loss=51.693, backward_time=1.028, grad_norm=97.361, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.860e-05, train_time=2.718
+[gpub005:0/64] 2023-07-08 22:51:03,830 (trainer:732) INFO: 28epoch:train:5701-5800batch: iter_time=1.254e-04, forward_time=0.145, loss_ctc=73.749, loss_att=52.299, acc=0.714, loss=58.734, backward_time=1.027, grad_norm=105.786, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.858e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 22:51:51,144 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-08 22:52:09,396 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 22:52:12,905 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9f306dbaf0>)
+[gpub005:0/64] 2023-07-08 22:52:12,905 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-08 22:52:12,911 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 22:58:01,898 (trainer:732) INFO: 28epoch:train:5801-5900batch: iter_time=1.311, forward_time=0.146, loss_ctc=72.764, loss_att=56.765, acc=0.696, loss=61.565, backward_time=1.052, grad_norm=112.026, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.857e-05, train_time=8.361
+[gpub005:0/64] 2023-07-08 23:00:18,123 (trainer:732) INFO: 28epoch:train:5901-6000batch: iter_time=1.145e-04, forward_time=0.146, loss_ctc=71.428, loss_att=53.963, acc=0.702, loss=59.203, backward_time=1.029, grad_norm=114.929, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.856e-05, train_time=2.724
+[gpub005:0/64] 2023-07-08 23:02:33,896 (trainer:732) INFO: 28epoch:train:6001-6100batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=79.868, loss_att=61.221, acc=0.697, loss=66.815, backward_time=1.028, grad_norm=100.067, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.854e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 23:04:49,978 (trainer:732) INFO: 28epoch:train:6101-6200batch: iter_time=1.071e-04, forward_time=0.146, loss_ctc=78.402, loss_att=59.583, acc=0.701, loss=65.229, backward_time=1.030, grad_norm=112.131, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.853e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 23:07:05,436 (trainer:732) INFO: 28epoch:train:6201-6300batch: iter_time=1.087e-04, forward_time=0.144, loss_ctc=67.248, loss_att=47.858, acc=0.704, loss=53.675, backward_time=1.026, grad_norm=110.296, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.852e-05, train_time=2.709
+[gpub005:0/64] 2023-07-08 23:09:20,706 (trainer:732) INFO: 28epoch:train:6301-6400batch: iter_time=1.095e-04, forward_time=0.145, loss_ctc=63.660, loss_att=43.054, acc=0.732, loss=49.236, backward_time=1.026, grad_norm=83.660, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.851e-05, train_time=2.705
+[gpub005:0/64] 2023-07-08 23:11:36,441 (trainer:732) INFO: 28epoch:train:6401-6500batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=60.251, loss_att=43.848, acc=0.724, loss=48.769, backward_time=1.028, grad_norm=91.709, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.849e-05, train_time=2.714
+[gpub005:0/64] 2023-07-08 23:13:52,452 (trainer:732) INFO: 28epoch:train:6501-6600batch: iter_time=1.138e-04, forward_time=0.147, loss_ctc=74.912, loss_att=55.471, acc=0.707, loss=61.303, backward_time=1.030, grad_norm=89.579, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.848e-05, train_time=2.720
+[gpub005:0/64] 2023-07-08 23:15:26,907 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-08 23:15:45,143 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 23:15:48,597 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93870977f0>)
+[gpub005:0/64] 2023-07-08 23:15:48,597 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-08 23:15:48,603 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 23:20:55,660 (trainer:732) INFO: 28epoch:train:6601-6700batch: iter_time=1.286, forward_time=0.153, loss_ctc=79.725, loss_att=63.645, acc=0.703, loss=68.469, backward_time=1.041, grad_norm=117.042, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.185, optim0_lr0=6.847e-05, train_time=8.464
+[gpub005:0/64] 2023-07-08 23:23:21,217 (trainer:732) INFO: 28epoch:train:6701-6800batch: iter_time=1.228e-04, forward_time=0.164, loss_ctc=72.691, loss_att=55.014, acc=0.698, loss=60.317, backward_time=1.046, grad_norm=109.801, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.845e-05, train_time=2.911
+[gpub005:0/64] 2023-07-08 23:25:38,727 (trainer:732) INFO: 28epoch:train:6801-6900batch: iter_time=1.085e-04, forward_time=0.145, loss_ctc=75.359, loss_att=57.243, acc=0.704, loss=62.678, backward_time=1.029, grad_norm=107.335, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.844e-05, train_time=2.750
+[gpub005:0/64] 2023-07-08 23:27:54,797 (trainer:732) INFO: 28epoch:train:6901-7000batch: iter_time=1.157e-04, forward_time=0.146, loss_ctc=74.894, loss_att=54.136, acc=0.713, loss=60.363, backward_time=1.029, grad_norm=108.816, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.843e-05, train_time=2.721
+[gpub005:0/64] 2023-07-08 23:30:11,239 (trainer:732) INFO: 28epoch:train:7001-7100batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=81.357, loss_att=58.035, acc=0.695, loss=65.032, backward_time=1.032, grad_norm=149.789, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.842e-05, train_time=2.729
+[gpub005:0/64] 2023-07-08 23:32:40,425 (trainer:732) INFO: 28epoch:train:7101-7200batch: iter_time=1.226e-04, forward_time=0.145, loss_ctc=66.158, loss_att=46.689, acc=0.720, loss=52.530, backward_time=1.049, grad_norm=111.917, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.840e-05, train_time=2.983
+[gpub005:0/64] 2023-07-08 23:34:58,213 (trainer:732) INFO: 28epoch:train:7201-7300batch: iter_time=1.228e-04, forward_time=0.145, loss_ctc=54.101, loss_att=39.666, acc=0.729, loss=43.996, backward_time=1.033, grad_norm=92.941, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.839e-05, train_time=2.756
+[gpub005:0/64] 2023-07-08 23:37:14,151 (trainer:732) INFO: 28epoch:train:7301-7400batch: iter_time=1.118e-04, forward_time=0.146, loss_ctc=68.799, loss_att=50.354, acc=0.721, loss=55.888, backward_time=1.029, grad_norm=109.989, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.838e-05, train_time=2.719
+[gpub005:0/64] 2023-07-08 23:39:31,461 (trainer:732) INFO: 28epoch:train:7401-7500batch: iter_time=1.039e-04, forward_time=0.145, loss_ctc=72.077, loss_att=54.561, acc=0.706, loss=59.816, backward_time=1.030, grad_norm=91.086, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.836e-05, train_time=2.746
+[gpub005:0/64] 2023-07-08 23:39:34,674 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-08 23:39:53,142 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-08 23:39:56,612 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f932bf8f4f0>)
+[gpub005:0/64] 2023-07-08 23:39:56,612 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-08 23:39:56,618 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-08 23:45:44,315 (trainer:732) INFO: 28epoch:train:7501-7600batch: iter_time=1.328, forward_time=0.147, loss_ctc=71.380, loss_att=56.723, acc=0.686, loss=61.120, backward_time=1.041, grad_norm=103.229, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.835e-05, train_time=7.457
+[gpub005:0/64] 2023-07-08 23:48:00,073 (trainer:732) INFO: 28epoch:train:7601-7700batch: iter_time=1.118e-04, forward_time=0.144, loss_ctc=72.398, loss_att=57.071, acc=0.691, loss=61.669, backward_time=1.027, grad_norm=89.683, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.834e-05, train_time=2.715
+[gpub005:0/64] 2023-07-08 23:50:16,768 (trainer:732) INFO: 28epoch:train:7701-7800batch: iter_time=1.307e-04, forward_time=0.146, loss_ctc=78.542, loss_att=58.230, acc=0.695, loss=64.323, backward_time=1.027, grad_norm=116.271, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.833e-05, train_time=2.734
+[gpub005:0/64] 2023-07-08 23:52:32,396 (trainer:732) INFO: 28epoch:train:7801-7900batch: iter_time=1.381e-04, forward_time=0.144, loss_ctc=78.828, loss_att=58.826, acc=0.698, loss=64.827, backward_time=1.027, grad_norm=109.064, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.831e-05, train_time=2.712
+[gpub005:0/64] 2023-07-08 23:54:48,243 (trainer:732) INFO: 28epoch:train:7901-8000batch: iter_time=1.203e-04, forward_time=0.145, loss_ctc=69.930, loss_att=50.336, acc=0.706, loss=56.214, backward_time=1.027, grad_norm=96.055, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.830e-05, train_time=2.717
+[gpub005:0/64] 2023-07-08 23:57:03,582 (trainer:732) INFO: 28epoch:train:8001-8100batch: iter_time=1.285e-04, forward_time=0.145, loss_ctc=59.761, loss_att=41.062, acc=0.728, loss=46.672, backward_time=1.026, grad_norm=94.381, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.829e-05, train_time=2.707
+[gpub005:0/64] 2023-07-08 23:59:19,301 (trainer:732) INFO: 28epoch:train:8101-8200batch: iter_time=1.383e-04, forward_time=0.145, loss_ctc=63.711, loss_att=48.157, acc=0.707, loss=52.823, backward_time=1.029, grad_norm=95.737, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.828e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 00:01:34,627 (trainer:732) INFO: 28epoch:train:8201-8300batch: iter_time=1.262e-04, forward_time=0.144, loss_ctc=74.016, loss_att=52.648, acc=0.706, loss=59.058, backward_time=1.025, grad_norm=98.099, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.826e-05, train_time=2.706
+[gpub005:0/64] 2023-07-09 00:02:20,574 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-09 00:02:39,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 00:02:42,968 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93870c8100>)
+[gpub005:0/64] 2023-07-09 00:02:42,969 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-09 00:02:42,975 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 00:07:17,507 (trainer:732) INFO: 28epoch:train:8301-8400batch: iter_time=1.279, forward_time=0.147, loss_ctc=67.473, loss_att=52.063, acc=0.704, loss=56.686, backward_time=1.045, grad_norm=110.832, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.825e-05, train_time=6.857
+[gpub005:0/64] 2023-07-09 00:09:34,819 (trainer:732) INFO: 28epoch:train:8401-8500batch: iter_time=1.026e-04, forward_time=0.143, loss_ctc=73.300, loss_att=54.385, acc=0.702, loss=60.059, backward_time=1.027, grad_norm=113.571, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.824e-05, train_time=2.746
+[gpub005:0/64] 2023-07-09 00:11:51,672 (trainer:732) INFO: 28epoch:train:8501-8600batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=79.277, loss_att=60.008, acc=0.702, loss=65.788, backward_time=1.029, grad_norm=134.268, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.822e-05, train_time=2.737
+[gpub005:0/64] 2023-07-09 00:14:08,387 (trainer:732) INFO: 28epoch:train:8601-8700batch: iter_time=1.041e-04, forward_time=0.144, loss_ctc=79.016, loss_att=61.352, acc=0.710, loss=66.652, backward_time=1.029, grad_norm=114.222, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.821e-05, train_time=2.734
+[gpub005:0/64] 2023-07-09 00:16:24,162 (trainer:732) INFO: 28epoch:train:8701-8800batch: iter_time=1.025e-04, forward_time=0.145, loss_ctc=71.708, loss_att=50.081, acc=0.692, loss=56.569, backward_time=1.027, grad_norm=119.421, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.820e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 00:18:39,832 (trainer:732) INFO: 28epoch:train:8801-8900batch: iter_time=1.068e-04, forward_time=0.145, loss_ctc=63.647, loss_att=45.869, acc=0.730, loss=51.202, backward_time=1.027, grad_norm=81.853, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.819e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 00:21:09,960 (trainer:732) INFO: 28epoch:train:8901-9000batch: iter_time=1.066e-04, forward_time=0.145, loss_ctc=55.394, loss_att=39.466, acc=0.733, loss=44.244, backward_time=1.066, grad_norm=83.283, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.817e-05, train_time=3.002
+[gpub005:0/64] 2023-07-09 00:23:27,121 (trainer:732) INFO: 28epoch:train:9001-9100batch: iter_time=1.032e-04, forward_time=0.146, loss_ctc=77.192, loss_att=57.711, acc=0.712, loss=63.555, backward_time=1.030, grad_norm=122.747, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.816e-05, train_time=2.743
+[gpub005:0/64] 2023-07-09 00:24:58,099 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-09 00:25:16,535 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 00:25:19,994 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f948335b4f0>)
+[gpub005:0/64] 2023-07-09 00:25:19,994 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-09 00:25:20,000 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 00:30:05,437 (trainer:732) INFO: 28epoch:train:9101-9200batch: iter_time=1.262, forward_time=0.153, loss_ctc=66.052, loss_att=48.621, acc=0.718, loss=53.850, backward_time=1.040, grad_norm=88.945, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.815e-05, train_time=7.966
+[gpub005:0/64] 2023-07-09 00:32:27,808 (trainer:732) INFO: 28epoch:train:9201-9300batch: iter_time=1.159e-04, forward_time=0.145, loss_ctc=71.873, loss_att=53.639, acc=0.689, loss=59.110, backward_time=1.041, grad_norm=96.280, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.814e-05, train_time=2.847
+[gpub005:0/64] 2023-07-09 00:34:45,342 (trainer:732) INFO: 28epoch:train:9301-9400batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=80.187, loss_att=62.167, acc=0.681, loss=67.573, backward_time=1.028, grad_norm=122.430, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.812e-05, train_time=2.750
+[gpub005:0/64] 2023-07-09 00:37:02,014 (trainer:732) INFO: 28epoch:train:9401-9500batch: iter_time=1.190e-04, forward_time=0.146, loss_ctc=78.718, loss_att=59.859, acc=0.703, loss=65.517, backward_time=1.029, grad_norm=119.060, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.811e-05, train_time=2.733
+[gpub005:0/64] 2023-07-09 00:39:18,308 (trainer:732) INFO: 28epoch:train:9501-9600batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=72.327, loss_att=48.677, acc=0.700, loss=55.772, backward_time=1.028, grad_norm=106.106, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.810e-05, train_time=2.726
+[gpub005:0/64] 2023-07-09 00:41:34,013 (trainer:732) INFO: 28epoch:train:9601-9700batch: iter_time=1.140e-04, forward_time=0.144, loss_ctc=64.789, loss_att=49.014, acc=0.714, loss=53.746, backward_time=1.026, grad_norm=113.313, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.182, optim0_lr0=6.809e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 00:43:49,399 (trainer:732) INFO: 28epoch:train:9701-9800batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=57.198, loss_att=41.033, acc=0.717, loss=45.882, backward_time=1.025, grad_norm=93.064, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.807e-05, train_time=2.707
+[gpub005:0/64] 2023-07-09 00:46:05,002 (trainer:732) INFO: 28epoch:train:9801-9900batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=73.809, loss_att=54.241, acc=0.702, loss=60.111, backward_time=1.027, grad_norm=105.246, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.806e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 00:48:20,905 (trainer:732) INFO: 28epoch:train:9901-10000batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=79.070, loss_att=61.377, acc=0.697, loss=66.685, backward_time=1.029, grad_norm=105.646, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.805e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 01:00:59,859 (trainer:338) INFO: 28epoch results: [train] iter_time=0.181, forward_time=0.147, loss_ctc=71.863, loss_att=53.365, acc=0.703, loss=58.915, backward_time=1.032, grad_norm=105.211, clip=100.000, loss_scale=4.836e+24, optim_step_time=0.183, optim0_lr0=6.868e-05, train_time=3.295, time=4 hours, 34 minutes and 54.57 seconds, total_count=250000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=47.311, cer_ctc=0.272, loss_att=40.448, acc=0.654, cer=0.441, wer=1.000, loss=42.507, time=6 minutes and 28.69 seconds, total_count=25806, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 49.92 seconds, total_count=0, gpu_max_cached_mem_GB=38.234
+[gpub005:0/64] 2023-07-09 01:01:17,688 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub005:0/64] 2023-07-09 01:01:17,806 (trainer:272) INFO: 29/30epoch started. Estimated time to finish: 9 hours, 37 minutes and 17.41 seconds
+[gpub005:0/64] 2023-07-09 01:01:18,821 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-09 01:01:37,314 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 01:01:42,700 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f955b813550>)
+[gpub005:0/64] 2023-07-09 01:01:42,700 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-09 01:01:42,754 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 01:08:33,856 (trainer:732) INFO: 29epoch:train:1-100batch: iter_time=2.923, forward_time=0.172, loss_ctc=75.895, loss_att=61.514, acc=0.699, loss=65.828, backward_time=1.046, grad_norm=122.053, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.192, optim0_lr0=6.803e-05, train_time=8.710
+[gpub005:0/64] 2023-07-09 01:10:50,462 (trainer:732) INFO: 29epoch:train:101-200batch: iter_time=1.038e-04, forward_time=0.144, loss_ctc=68.461, loss_att=53.461, acc=0.697, loss=57.961, backward_time=1.028, grad_norm=104.908, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.802e-05, train_time=2.732
+[gpub005:0/64] 2023-07-09 01:13:08,278 (trainer:732) INFO: 29epoch:train:201-300batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=90.315, loss_att=55.873, acc=0.705, loss=66.206, backward_time=1.031, grad_norm=113.891, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.801e-05, train_time=2.756
+[gpub005:0/64] 2023-07-09 01:15:33,011 (trainer:732) INFO: 29epoch:train:301-400batch: iter_time=1.055e-04, forward_time=0.145, loss_ctc=73.221, loss_att=50.232, acc=0.710, loss=57.129, backward_time=1.036, grad_norm=122.026, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.800e-05, train_time=2.894
+[gpub005:0/64] 2023-07-09 01:17:54,852 (trainer:732) INFO: 29epoch:train:401-500batch: iter_time=9.923e-05, forward_time=0.145, loss_ctc=73.934, loss_att=56.527, acc=0.698, loss=61.749, backward_time=1.033, grad_norm=117.019, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.798e-05, train_time=2.837
+[gpub005:0/64] 2023-07-09 01:20:26,236 (trainer:732) INFO: 29epoch:train:501-600batch: iter_time=9.637e-05, forward_time=0.145, loss_ctc=70.822, loss_att=54.513, acc=0.707, loss=59.406, backward_time=1.075, grad_norm=98.241, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.797e-05, train_time=3.027
+[gpub005:0/64] 2023-07-09 01:22:58,108 (trainer:732) INFO: 29epoch:train:601-700batch: iter_time=9.952e-05, forward_time=0.145, loss_ctc=70.652, loss_att=57.641, acc=0.680, loss=61.544, backward_time=1.053, grad_norm=96.985, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.796e-05, train_time=3.037
+[gpub005:0/64] 2023-07-09 01:25:17,769 (trainer:732) INFO: 29epoch:train:701-800batch: iter_time=1.138e-04, forward_time=0.144, loss_ctc=60.087, loss_att=42.384, acc=0.718, loss=47.695, backward_time=1.040, grad_norm=90.361, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.795e-05, train_time=2.793
+[gpub005:0/64] 2023-07-09 01:26:10,853 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-09 01:26:28,595 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 01:26:32,230 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f955b88feb0>)
+[gpub005:0/64] 2023-07-09 01:26:32,230 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-09 01:26:32,236 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 01:31:54,364 (trainer:732) INFO: 29epoch:train:801-900batch: iter_time=1.384, forward_time=0.149, loss_ctc=78.227, loss_att=62.233, acc=0.694, loss=67.031, backward_time=1.042, grad_norm=108.631, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.793e-05, train_time=7.932
+[gpub005:0/64] 2023-07-09 01:34:11,688 (trainer:732) INFO: 29epoch:train:901-1000batch: iter_time=1.125e-04, forward_time=0.144, loss_ctc=63.851, loss_att=49.602, acc=0.700, loss=53.877, backward_time=1.029, grad_norm=110.254, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.792e-05, train_time=2.746
+[gpub005:0/64] 2023-07-09 01:36:27,600 (trainer:732) INFO: 29epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=78.049, loss_att=56.277, acc=0.710, loss=62.809, backward_time=1.029, grad_norm=103.467, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.791e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 01:38:43,893 (trainer:732) INFO: 29epoch:train:1101-1200batch: iter_time=1.122e-04, forward_time=0.145, loss_ctc=80.017, loss_att=48.167, acc=0.710, loss=57.722, backward_time=1.029, grad_norm=140.589, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.790e-05, train_time=2.726
+[gpub005:0/64] 2023-07-09 01:41:00,282 (trainer:732) INFO: 29epoch:train:1201-1300batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=76.299, loss_att=60.020, acc=0.697, loss=64.904, backward_time=1.032, grad_norm=116.404, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.788e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 01:43:16,385 (trainer:732) INFO: 29epoch:train:1301-1400batch: iter_time=1.132e-04, forward_time=0.146, loss_ctc=73.892, loss_att=56.992, acc=0.705, loss=62.062, backward_time=1.031, grad_norm=120.717, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.787e-05, train_time=2.722
+[gpub005:0/64] 2023-07-09 01:45:32,444 (trainer:732) INFO: 29epoch:train:1401-1500batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=70.577, loss_att=55.685, acc=0.690, loss=60.153, backward_time=1.030, grad_norm=103.511, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.786e-05, train_time=2.721
+[gpub005:0/64] 2023-07-09 01:47:47,996 (trainer:732) INFO: 29epoch:train:1501-1600batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=63.930, loss_att=50.799, acc=0.697, loss=54.738, backward_time=1.026, grad_norm=103.992, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.785e-05, train_time=2.711
+[gpub005:0/64] 2023-07-09 01:49:20,766 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-09 01:49:38,971 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 01:49:42,688 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93870cf880>)
+[gpub005:0/64] 2023-07-09 01:49:42,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-09 01:49:42,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 01:54:36,572 (trainer:732) INFO: 29epoch:train:1601-1700batch: iter_time=1.342, forward_time=0.168, loss_ctc=63.970, loss_att=46.468, acc=0.710, loss=51.719, backward_time=1.035, grad_norm=113.327, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.184, optim0_lr0=6.783e-05, train_time=8.171
+[gpub005:0/64] 2023-07-09 01:56:56,586 (trainer:732) INFO: 29epoch:train:1701-1800batch: iter_time=1.246e-04, forward_time=0.146, loss_ctc=64.314, loss_att=50.068, acc=0.699, loss=54.342, backward_time=1.035, grad_norm=153.292, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.782e-05, train_time=2.800
+[gpub005:0/64] 2023-07-09 01:59:12,539 (trainer:732) INFO: 29epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.147, loss_ctc=74.996, loss_att=54.133, acc=0.713, loss=60.392, backward_time=1.029, grad_norm=101.452, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.781e-05, train_time=2.719
+[gpub005:0/64] 2023-07-09 02:01:28,732 (trainer:732) INFO: 29epoch:train:1901-2000batch: iter_time=1.087e-04, forward_time=0.147, loss_ctc=79.906, loss_att=47.196, acc=0.715, loss=57.009, backward_time=1.030, grad_norm=115.619, clip=100.000, loss_scale=9.671e+24, optim_step_time=0.183, optim0_lr0=6.780e-05, train_time=2.724
+[gpub005:0/64] 2023-07-09 02:03:45,806 (trainer:732) INFO: 29epoch:train:2001-2100batch: iter_time=1.086e-04, forward_time=0.148, loss_ctc=73.845, loss_att=58.312, acc=0.700, loss=62.972, backward_time=1.032, grad_norm=112.680, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.778e-05, train_time=2.741
+[gpub005:0/64] 2023-07-09 02:06:01,896 (trainer:732) INFO: 29epoch:train:2101-2200batch: iter_time=1.073e-04, forward_time=0.147, loss_ctc=72.628, loss_att=54.739, acc=0.701, loss=60.106, backward_time=1.030, grad_norm=99.890, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.777e-05, train_time=2.722
+[gpub005:0/64] 2023-07-09 02:08:17,780 (trainer:732) INFO: 29epoch:train:2201-2300batch: iter_time=1.062e-04, forward_time=0.146, loss_ctc=69.153, loss_att=52.877, acc=0.697, loss=57.760, backward_time=1.029, grad_norm=124.253, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.776e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 02:10:33,671 (trainer:732) INFO: 29epoch:train:2301-2400batch: iter_time=1.090e-04, forward_time=0.146, loss_ctc=64.791, loss_att=53.066, acc=0.698, loss=56.583, backward_time=1.029, grad_norm=124.055, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.775e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 02:12:49,411 (trainer:732) INFO: 29epoch:train:2401-2500batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=67.622, loss_att=55.140, acc=0.702, loss=58.884, backward_time=1.028, grad_norm=88.955, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.182, optim0_lr0=6.773e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 02:12:54,375 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-09 02:13:12,234 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 02:13:15,958 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92f91736a0>)
+[gpub005:0/64] 2023-07-09 02:13:15,958 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-09 02:13:15,964 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 02:19:10,372 (trainer:732) INFO: 29epoch:train:2501-2600batch: iter_time=2.404, forward_time=0.165, loss_ctc=66.935, loss_att=50.456, acc=0.706, loss=55.400, backward_time=1.037, grad_norm=106.775, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.185, optim0_lr0=6.772e-05, train_time=7.619
+[gpub005:0/64] 2023-07-09 02:21:26,766 (trainer:732) INFO: 29epoch:train:2601-2700batch: iter_time=1.113e-04, forward_time=0.146, loss_ctc=68.042, loss_att=50.061, acc=0.715, loss=55.456, backward_time=1.030, grad_norm=115.964, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.771e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 02:23:45,668 (trainer:732) INFO: 29epoch:train:2701-2800batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=87.303, loss_att=56.217, acc=0.712, loss=65.543, backward_time=1.029, grad_norm=147.226, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.770e-05, train_time=2.778
+[gpub005:0/64] 2023-07-09 02:26:01,790 (trainer:732) INFO: 29epoch:train:2801-2900batch: iter_time=1.210e-04, forward_time=0.146, loss_ctc=68.342, loss_att=46.773, acc=0.712, loss=53.244, backward_time=1.029, grad_norm=107.727, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.768e-05, train_time=2.722
+[gpub005:0/64] 2023-07-09 02:28:17,591 (trainer:732) INFO: 29epoch:train:2901-3000batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=70.546, loss_att=55.696, acc=0.702, loss=60.151, backward_time=1.028, grad_norm=97.665, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.767e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 02:30:33,597 (trainer:732) INFO: 29epoch:train:3001-3100batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=73.929, loss_att=58.503, acc=0.704, loss=63.131, backward_time=1.029, grad_norm=99.412, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.766e-05, train_time=2.720
+[gpub005:0/64] 2023-07-09 02:32:49,257 (trainer:732) INFO: 29epoch:train:3101-3200batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=64.078, loss_att=51.000, acc=0.697, loss=54.924, backward_time=1.026, grad_norm=104.136, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.765e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 02:35:05,104 (trainer:732) INFO: 29epoch:train:3201-3300batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=64.099, loss_att=47.265, acc=0.709, loss=52.315, backward_time=1.029, grad_norm=97.522, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.764e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 02:35:54,763 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-09 02:36:12,774 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 02:36:16,518 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f99ecf23ee0>)
+[gpub005:0/64] 2023-07-09 02:36:16,518 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-09 02:36:16,524 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 02:40:55,557 (trainer:732) INFO: 29epoch:train:3301-3400batch: iter_time=1.419, forward_time=0.149, loss_ctc=81.136, loss_att=66.941, acc=0.694, loss=71.200, backward_time=1.068, grad_norm=119.406, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.762e-05, train_time=7.009
+[gpub005:0/64] 2023-07-09 02:43:16,133 (trainer:732) INFO: 29epoch:train:3401-3500batch: iter_time=1.141e-04, forward_time=0.145, loss_ctc=63.487, loss_att=49.185, acc=0.710, loss=53.476, backward_time=1.034, grad_norm=98.479, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.761e-05, train_time=2.811
+[gpub005:0/64] 2023-07-09 02:45:32,121 (trainer:732) INFO: 29epoch:train:3501-3600batch: iter_time=1.180e-04, forward_time=0.145, loss_ctc=77.025, loss_att=55.124, acc=0.715, loss=61.694, backward_time=1.028, grad_norm=114.670, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.760e-05, train_time=2.720
+[gpub005:0/64] 2023-07-09 02:47:47,521 (trainer:732) INFO: 29epoch:train:3601-3700batch: iter_time=1.214e-04, forward_time=0.145, loss_ctc=76.344, loss_att=46.095, acc=0.716, loss=55.170, backward_time=1.025, grad_norm=108.088, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.759e-05, train_time=2.708
+[gpub005:0/64] 2023-07-09 02:50:03,361 (trainer:732) INFO: 29epoch:train:3701-3800batch: iter_time=1.249e-04, forward_time=0.146, loss_ctc=74.359, loss_att=57.845, acc=0.703, loss=62.799, backward_time=1.027, grad_norm=113.639, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.757e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 02:52:18,965 (trainer:732) INFO: 29epoch:train:3801-3900batch: iter_time=1.165e-04, forward_time=0.145, loss_ctc=74.601, loss_att=55.697, acc=0.706, loss=61.368, backward_time=1.027, grad_norm=110.806, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.756e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 02:54:49,718 (trainer:732) INFO: 29epoch:train:3901-4000batch: iter_time=1.248e-04, forward_time=0.147, loss_ctc=69.897, loss_att=54.034, acc=0.698, loss=58.793, backward_time=1.058, grad_norm=103.506, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.755e-05, train_time=3.015
+[gpub005:0/64] 2023-07-09 02:57:08,751 (trainer:732) INFO: 29epoch:train:4001-4100batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=63.725, loss_att=50.310, acc=0.701, loss=54.335, backward_time=1.034, grad_norm=87.206, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.754e-05, train_time=2.780
+[gpub005:0/64] 2023-07-09 02:58:49,008 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-09 02:59:07,100 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 02:59:10,536 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa039d6c4c0>)
+[gpub005:0/64] 2023-07-09 02:59:10,536 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-09 02:59:10,542 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 03:03:56,521 (trainer:732) INFO: 29epoch:train:4101-4200batch: iter_time=1.354, forward_time=0.145, loss_ctc=72.105, loss_att=56.202, acc=0.708, loss=60.973, backward_time=1.040, grad_norm=111.622, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.752e-05, train_time=8.155
+[gpub005:0/64] 2023-07-09 03:06:13,202 (trainer:732) INFO: 29epoch:train:4201-4300batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=69.002, loss_att=53.608, acc=0.690, loss=58.226, backward_time=1.028, grad_norm=99.107, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.751e-05, train_time=2.733
+[gpub005:0/64] 2023-07-09 03:08:28,913 (trainer:732) INFO: 29epoch:train:4301-4400batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=68.222, loss_att=49.890, acc=0.715, loss=55.390, backward_time=1.027, grad_norm=99.071, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.750e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 03:10:44,538 (trainer:732) INFO: 29epoch:train:4401-4500batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=84.780, loss_att=56.108, acc=0.707, loss=64.710, backward_time=1.027, grad_norm=107.348, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.749e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 03:13:02,169 (trainer:732) INFO: 29epoch:train:4501-4600batch: iter_time=1.167e-04, forward_time=0.145, loss_ctc=69.759, loss_att=47.626, acc=0.702, loss=54.266, backward_time=1.029, grad_norm=98.923, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.747e-05, train_time=2.752
+[gpub005:0/64] 2023-07-09 03:15:18,719 (trainer:732) INFO: 29epoch:train:4601-4700batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=70.803, loss_att=56.571, acc=0.695, loss=60.841, backward_time=1.026, grad_norm=104.781, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.746e-05, train_time=2.731
+[gpub005:0/64] 2023-07-09 03:17:34,404 (trainer:732) INFO: 29epoch:train:4701-4800batch: iter_time=1.086e-04, forward_time=0.145, loss_ctc=72.078, loss_att=57.762, acc=0.699, loss=62.057, backward_time=1.027, grad_norm=104.257, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.745e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 03:19:52,007 (trainer:732) INFO: 29epoch:train:4801-4900batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=64.906, loss_att=51.504, acc=0.697, loss=55.525, backward_time=1.030, grad_norm=93.447, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.744e-05, train_time=2.752
+[gpub005:0/64] 2023-07-09 03:22:08,209 (trainer:732) INFO: 29epoch:train:4901-5000batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=63.847, loss_att=49.021, acc=0.700, loss=53.469, backward_time=1.026, grad_norm=89.197, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.743e-05, train_time=2.724
+[gpub005:0/64] 2023-07-09 03:22:16,127 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-09 03:22:34,650 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 03:22:38,116 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa039dc7460>)
+[gpub005:0/64] 2023-07-09 03:22:38,116 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-09 03:22:38,123 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 03:29:32,011 (trainer:732) INFO: 29epoch:train:5001-5100batch: iter_time=1.438, forward_time=0.167, loss_ctc=66.539, loss_att=51.321, acc=0.694, loss=55.886, backward_time=1.037, grad_norm=101.883, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.184, optim0_lr0=6.741e-05, train_time=8.875
+[gpub005:0/64] 2023-07-09 03:31:48,082 (trainer:732) INFO: 29epoch:train:5101-5200batch: iter_time=1.251e-04, forward_time=0.145, loss_ctc=67.465, loss_att=49.395, acc=0.710, loss=54.816, backward_time=1.027, grad_norm=99.778, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.740e-05, train_time=2.722
+[gpub005:0/64] 2023-07-09 03:34:04,172 (trainer:732) INFO: 29epoch:train:5201-5300batch: iter_time=1.185e-04, forward_time=0.145, loss_ctc=86.270, loss_att=55.902, acc=0.709, loss=65.012, backward_time=1.026, grad_norm=130.441, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.739e-05, train_time=2.722
+[gpub005:0/64] 2023-07-09 03:36:19,829 (trainer:732) INFO: 29epoch:train:5301-5400batch: iter_time=1.055e-04, forward_time=0.147, loss_ctc=69.380, loss_att=47.502, acc=0.705, loss=54.065, backward_time=1.026, grad_norm=129.518, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.738e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 03:38:36,943 (trainer:732) INFO: 29epoch:train:5401-5500batch: iter_time=1.046e-04, forward_time=0.148, loss_ctc=69.082, loss_att=55.354, acc=0.697, loss=59.473, backward_time=1.029, grad_norm=113.486, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.736e-05, train_time=2.742
+[gpub005:0/64] 2023-07-09 03:40:53,445 (trainer:732) INFO: 29epoch:train:5501-5600batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=72.582, loss_att=57.397, acc=0.696, loss=61.952, backward_time=1.029, grad_norm=101.675, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.735e-05, train_time=2.730
+[gpub005:0/64] 2023-07-09 03:43:09,072 (trainer:732) INFO: 29epoch:train:5601-5700batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=64.110, loss_att=50.796, acc=0.696, loss=54.790, backward_time=1.026, grad_norm=95.741, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.734e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 03:45:25,401 (trainer:732) INFO: 29epoch:train:5701-5800batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=63.508, loss_att=48.524, acc=0.705, loss=53.020, backward_time=1.026, grad_norm=92.218, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.733e-05, train_time=2.726
+[gpub005:0/64] 2023-07-09 03:46:27,381 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-09 03:46:45,495 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 03:46:49,010 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94a48ce770>)
+[gpub005:0/64] 2023-07-09 03:46:49,010 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-09 03:46:49,016 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 03:53:18,837 (trainer:732) INFO: 29epoch:train:5801-5900batch: iter_time=2.783, forward_time=0.147, loss_ctc=79.436, loss_att=67.417, acc=0.689, loss=71.022, backward_time=1.047, grad_norm=117.828, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.732e-05, train_time=9.468
+[gpub005:0/64] 2023-07-09 03:55:34,614 (trainer:732) INFO: 29epoch:train:5901-6000batch: iter_time=1.240e-04, forward_time=0.145, loss_ctc=62.841, loss_att=48.708, acc=0.705, loss=52.948, backward_time=1.026, grad_norm=98.596, clip=100.000, loss_scale=1.934e+25, optim_step_time=0.183, optim0_lr0=6.730e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 03:57:50,427 (trainer:732) INFO: 29epoch:train:6001-6100batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=76.795, loss_att=55.754, acc=0.710, loss=62.066, backward_time=1.024, grad_norm=125.132, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.729e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 04:00:06,297 (trainer:732) INFO: 29epoch:train:6101-6200batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=76.623, loss_att=46.076, acc=0.713, loss=55.240, backward_time=1.026, grad_norm=135.922, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.728e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 04:02:23,184 (trainer:732) INFO: 29epoch:train:6201-6300batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=73.911, loss_att=58.019, acc=0.695, loss=62.787, backward_time=1.028, grad_norm=106.548, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.727e-05, train_time=2.738
+[gpub005:0/64] 2023-07-09 04:04:41,945 (trainer:732) INFO: 29epoch:train:6301-6400batch: iter_time=1.209e-04, forward_time=0.146, loss_ctc=73.690, loss_att=55.703, acc=0.702, loss=61.099, backward_time=1.028, grad_norm=92.974, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.725e-05, train_time=2.775
+[gpub005:0/64] 2023-07-09 04:07:02,114 (trainer:732) INFO: 29epoch:train:6401-6500batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=69.683, loss_att=53.819, acc=0.693, loss=58.578, backward_time=1.030, grad_norm=107.754, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.724e-05, train_time=2.803
+[gpub005:0/64] 2023-07-09 04:09:24,271 (trainer:732) INFO: 29epoch:train:6501-6600batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=62.912, loss_att=49.656, acc=0.700, loss=53.633, backward_time=1.051, grad_norm=93.183, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.723e-05, train_time=2.843
+[gpub005:0/64] 2023-07-09 04:10:56,904 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-09 04:11:14,983 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 04:11:18,376 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa039f9b520>)
+[gpub005:0/64] 2023-07-09 04:11:18,376 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-09 04:11:18,383 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 04:15:17,356 (trainer:732) INFO: 29epoch:train:6601-6700batch: iter_time=1.432, forward_time=0.150, loss_ctc=71.020, loss_att=55.879, acc=0.709, loss=60.421, backward_time=1.057, grad_norm=109.386, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.722e-05, train_time=7.061
+[gpub005:0/64] 2023-07-09 04:17:34,269 (trainer:732) INFO: 29epoch:train:6701-6800batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=67.619, loss_att=52.545, acc=0.704, loss=57.067, backward_time=1.032, grad_norm=94.824, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.721e-05, train_time=2.738
+[gpub005:0/64] 2023-07-09 04:19:50,475 (trainer:732) INFO: 29epoch:train:6801-6900batch: iter_time=1.284e-04, forward_time=0.146, loss_ctc=67.948, loss_att=51.387, acc=0.714, loss=56.355, backward_time=1.030, grad_norm=96.289, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.719e-05, train_time=2.724
+[gpub005:0/64] 2023-07-09 04:22:06,494 (trainer:732) INFO: 29epoch:train:6901-7000batch: iter_time=1.305e-04, forward_time=0.147, loss_ctc=84.714, loss_att=54.294, acc=0.723, loss=63.420, backward_time=1.030, grad_norm=101.845, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.718e-05, train_time=2.720
+[gpub005:0/64] 2023-07-09 04:24:22,311 (trainer:732) INFO: 29epoch:train:7001-7100batch: iter_time=1.207e-04, forward_time=0.146, loss_ctc=69.271, loss_att=47.599, acc=0.712, loss=54.101, backward_time=1.026, grad_norm=120.928, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.717e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 04:26:38,042 (trainer:732) INFO: 29epoch:train:7101-7200batch: iter_time=1.078e-04, forward_time=0.145, loss_ctc=69.513, loss_att=54.422, acc=0.710, loss=58.949, backward_time=1.027, grad_norm=110.610, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.716e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 04:28:54,309 (trainer:732) INFO: 29epoch:train:7201-7300batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=72.193, loss_att=57.718, acc=0.709, loss=62.061, backward_time=1.030, grad_norm=102.662, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.715e-05, train_time=2.725
+[gpub005:0/64] 2023-07-09 04:31:09,724 (trainer:732) INFO: 29epoch:train:7301-7400batch: iter_time=1.112e-04, forward_time=0.146, loss_ctc=66.020, loss_att=51.633, acc=0.701, loss=55.949, backward_time=1.025, grad_norm=97.883, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.713e-05, train_time=2.708
+[gpub005:0/64] 2023-07-09 04:33:26,175 (trainer:732) INFO: 29epoch:train:7401-7500batch: iter_time=1.074e-04, forward_time=0.147, loss_ctc=62.895, loss_att=47.361, acc=0.711, loss=52.021, backward_time=1.027, grad_norm=95.711, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.712e-05, train_time=2.729
+[gpub005:0/64] 2023-07-09 04:33:28,013 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-09 04:33:46,250 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 04:33:49,686 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93a2fb37f0>)
+[gpub005:0/64] 2023-07-09 04:33:49,686 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-09 04:33:49,720 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 04:39:35,421 (trainer:732) INFO: 29epoch:train:7501-7600batch: iter_time=1.272, forward_time=0.187, loss_ctc=73.581, loss_att=59.664, acc=0.699, loss=63.839, backward_time=1.051, grad_norm=123.090, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.185, optim0_lr0=6.711e-05, train_time=7.385
+[gpub005:0/64] 2023-07-09 04:41:51,424 (trainer:732) INFO: 29epoch:train:7601-7700batch: iter_time=1.149e-04, forward_time=0.147, loss_ctc=66.484, loss_att=50.773, acc=0.706, loss=55.486, backward_time=1.027, grad_norm=101.249, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.710e-05, train_time=2.720
+[gpub005:0/64] 2023-07-09 04:44:13,765 (trainer:732) INFO: 29epoch:train:7701-7800batch: iter_time=1.341e-04, forward_time=0.146, loss_ctc=84.648, loss_att=54.865, acc=0.710, loss=63.799, backward_time=1.032, grad_norm=114.104, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.709e-05, train_time=2.847
+[gpub005:0/64] 2023-07-09 04:46:32,054 (trainer:732) INFO: 29epoch:train:7801-7900batch: iter_time=1.111e-04, forward_time=0.147, loss_ctc=69.048, loss_att=47.959, acc=0.715, loss=54.286, backward_time=1.040, grad_norm=97.389, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.707e-05, train_time=2.766
+[gpub005:0/64] 2023-07-09 04:48:49,620 (trainer:732) INFO: 29epoch:train:7901-8000batch: iter_time=1.106e-04, forward_time=0.147, loss_ctc=72.274, loss_att=55.314, acc=0.692, loss=60.402, backward_time=1.028, grad_norm=137.366, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.706e-05, train_time=2.751
+[gpub005:0/64] 2023-07-09 04:51:28,616 (trainer:732) INFO: 29epoch:train:8001-8100batch: iter_time=1.309e-04, forward_time=0.147, loss_ctc=69.384, loss_att=53.865, acc=0.708, loss=58.521, backward_time=1.057, grad_norm=96.316, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.705e-05, train_time=3.180
+[gpub005:0/64] 2023-07-09 04:53:45,027 (trainer:732) INFO: 29epoch:train:8101-8200batch: iter_time=1.360e-04, forward_time=0.147, loss_ctc=69.389, loss_att=54.583, acc=0.692, loss=59.025, backward_time=1.030, grad_norm=99.264, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.704e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 04:56:00,319 (trainer:732) INFO: 29epoch:train:8201-8300batch: iter_time=1.142e-04, forward_time=0.143, loss_ctc=58.213, loss_att=41.467, acc=0.724, loss=46.491, backward_time=1.023, grad_norm=83.113, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.702e-05, train_time=2.706
+[gpub005:0/64] 2023-07-09 04:56:51,431 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-09 04:57:09,723 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 04:57:13,145 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9357c874f0>)
+[gpub005:0/64] 2023-07-09 04:57:13,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-09 04:57:13,152 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 05:01:29,095 (trainer:732) INFO: 29epoch:train:8301-8400batch: iter_time=1.321, forward_time=0.146, loss_ctc=77.823, loss_att=61.892, acc=0.697, loss=66.671, backward_time=1.041, grad_norm=131.315, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.701e-05, train_time=6.575
+[gpub005:0/64] 2023-07-09 05:03:46,371 (trainer:732) INFO: 29epoch:train:8401-8500batch: iter_time=1.200e-04, forward_time=0.146, loss_ctc=62.357, loss_att=49.168, acc=0.702, loss=53.124, backward_time=1.028, grad_norm=114.769, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.700e-05, train_time=2.745
+[gpub005:0/64] 2023-07-09 05:06:02,208 (trainer:732) INFO: 29epoch:train:8501-8600batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=76.933, loss_att=55.223, acc=0.716, loss=61.736, backward_time=1.029, grad_norm=107.873, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.699e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 05:08:17,804 (trainer:732) INFO: 29epoch:train:8601-8700batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=75.982, loss_att=45.898, acc=0.712, loss=54.923, backward_time=1.026, grad_norm=112.216, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.698e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 05:10:33,845 (trainer:732) INFO: 29epoch:train:8701-8800batch: iter_time=1.255e-04, forward_time=0.147, loss_ctc=74.836, loss_att=57.671, acc=0.698, loss=62.820, backward_time=1.029, grad_norm=99.814, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.696e-05, train_time=2.721
+[gpub005:0/64] 2023-07-09 05:12:49,571 (trainer:732) INFO: 29epoch:train:8801-8900batch: iter_time=1.217e-04, forward_time=0.146, loss_ctc=72.233, loss_att=54.973, acc=0.705, loss=60.151, backward_time=1.027, grad_norm=95.784, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.695e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 05:15:05,314 (trainer:732) INFO: 29epoch:train:8901-9000batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=67.982, loss_att=52.959, acc=0.698, loss=57.466, backward_time=1.026, grad_norm=94.407, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.694e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 05:17:20,925 (trainer:732) INFO: 29epoch:train:9001-9100batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=63.000, loss_att=48.975, acc=0.703, loss=53.183, backward_time=1.027, grad_norm=93.871, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.693e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 05:18:51,733 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-09 05:19:10,033 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 05:19:13,693 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92f8327e20>)
+[gpub005:0/64] 2023-07-09 05:19:13,693 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-09 05:19:13,699 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 05:22:38,222 (trainer:732) INFO: 29epoch:train:9101-9200batch: iter_time=1.288, forward_time=0.145, loss_ctc=63.360, loss_att=48.019, acc=0.710, loss=52.621, backward_time=1.038, grad_norm=99.747, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.692e-05, train_time=6.346
+[gpub005:0/64] 2023-07-09 05:24:58,949 (trainer:732) INFO: 29epoch:train:9201-9300batch: iter_time=1.223e-04, forward_time=0.146, loss_ctc=62.760, loss_att=47.716, acc=0.709, loss=52.229, backward_time=1.046, grad_norm=108.849, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.690e-05, train_time=2.814
+[gpub005:0/64] 2023-07-09 05:27:21,659 (trainer:732) INFO: 29epoch:train:9301-9400batch: iter_time=1.041e-04, forward_time=0.146, loss_ctc=74.976, loss_att=52.987, acc=0.714, loss=59.583, backward_time=1.067, grad_norm=117.382, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.689e-05, train_time=2.854
+[gpub005:0/64] 2023-07-09 05:29:40,539 (trainer:732) INFO: 29epoch:train:9401-9500batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=76.575, loss_att=47.980, acc=0.712, loss=56.559, backward_time=1.030, grad_norm=107.961, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.688e-05, train_time=2.777
+[gpub005:0/64] 2023-07-09 05:31:56,436 (trainer:732) INFO: 29epoch:train:9501-9600batch: iter_time=1.186e-04, forward_time=0.146, loss_ctc=70.340, loss_att=56.056, acc=0.697, loss=60.341, backward_time=1.027, grad_norm=106.273, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.687e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 05:34:12,843 (trainer:732) INFO: 29epoch:train:9601-9700batch: iter_time=1.046e-04, forward_time=0.146, loss_ctc=71.444, loss_att=54.036, acc=0.701, loss=59.259, backward_time=1.028, grad_norm=99.944, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.686e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 05:36:29,650 (trainer:732) INFO: 29epoch:train:9701-9800batch: iter_time=1.158e-04, forward_time=0.152, loss_ctc=68.773, loss_att=52.287, acc=0.698, loss=57.233, backward_time=1.027, grad_norm=110.662, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.684e-05, train_time=2.736
+[gpub005:0/64] 2023-07-09 05:38:48,684 (trainer:732) INFO: 29epoch:train:9801-9900batch: iter_time=1.102e-04, forward_time=0.146, loss_ctc=64.624, loss_att=52.273, acc=0.696, loss=55.979, backward_time=1.028, grad_norm=103.082, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.683e-05, train_time=2.780
+[gpub005:0/64] 2023-07-09 05:41:10,992 (trainer:732) INFO: 29epoch:train:9901-10000batch: iter_time=1.091e-04, forward_time=0.145, loss_ctc=67.638, loss_att=54.649, acc=0.699, loss=58.546, backward_time=1.032, grad_norm=92.414, clip=100.000, loss_scale=3.869e+25, optim_step_time=0.183, optim0_lr0=6.682e-05, train_time=2.846
+[gpub005:0/64] 2023-07-09 05:55:00,842 (trainer:338) INFO: 29epoch results: [train] iter_time=0.204, forward_time=0.147, loss_ctc=70.995, loss_att=53.088, acc=0.704, loss=58.460, backward_time=1.033, grad_norm=107.676, clip=100.000, loss_scale=2.515e+25, optim_step_time=0.183, optim0_lr0=6.742e-05, train_time=3.358, time=4 hours, 40 minutes and 6.74 seconds, total_count=260000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=48.134, cer_ctc=0.273, loss_att=39.724, acc=0.683, cer=0.360, wer=0.992, loss=42.247, time=7 minutes and 46.79 seconds, total_count=26818, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 49.35 seconds, total_count=0, gpu_max_cached_mem_GB=38.234
+[gpub005:0/64] 2023-07-09 05:55:19,622 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub005:0/64] 2023-07-09 05:55:19,855 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/18epoch.pth
+[gpub005:0/64] 2023-07-09 05:55:19,907 (trainer:272) INFO: 30/30epoch started. Estimated time to finish: 4 hours, 49 minutes and 24.9 seconds
+[gpub005:0/64] 2023-07-09 05:55:21,146 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub005:0/64] 2023-07-09 05:55:40,193 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 05:55:43,757 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7fa1c9b79510>)
+[gpub005:0/64] 2023-07-09 05:55:43,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub005:0/64] 2023-07-09 05:55:43,848 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 06:02:34,500 (trainer:732) INFO: 30epoch:train:1-100batch: iter_time=2.918, forward_time=0.176, loss_ctc=81.654, loss_att=67.808, acc=0.681, loss=71.962, backward_time=1.043, grad_norm=106.248, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.185, optim0_lr0=6.681e-05, train_time=8.680
+[gpub005:0/64] 2023-07-09 06:04:53,428 (trainer:732) INFO: 30epoch:train:101-200batch: iter_time=1.164e-04, forward_time=0.145, loss_ctc=70.159, loss_att=56.442, acc=0.698, loss=60.557, backward_time=1.033, grad_norm=116.749, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.680e-05, train_time=2.778
+[gpub005:0/64] 2023-07-09 06:07:19,263 (trainer:732) INFO: 30epoch:train:201-300batch: iter_time=1.039e-04, forward_time=0.144, loss_ctc=75.061, loss_att=51.507, acc=0.692, loss=58.573, backward_time=1.042, grad_norm=111.019, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.679e-05, train_time=2.916
+[gpub005:0/64] 2023-07-09 06:09:35,049 (trainer:732) INFO: 30epoch:train:301-400batch: iter_time=1.042e-04, forward_time=0.144, loss_ctc=69.663, loss_att=53.432, acc=0.703, loss=58.301, backward_time=1.029, grad_norm=130.261, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.677e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 06:11:56,677 (trainer:732) INFO: 30epoch:train:401-500batch: iter_time=1.208e-04, forward_time=0.144, loss_ctc=79.996, loss_att=60.102, acc=0.682, loss=66.070, backward_time=1.054, grad_norm=137.978, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.676e-05, train_time=2.832
+[gpub005:0/64] 2023-07-09 06:14:12,035 (trainer:732) INFO: 30epoch:train:501-600batch: iter_time=1.345e-04, forward_time=0.144, loss_ctc=75.063, loss_att=53.414, acc=0.695, loss=59.909, backward_time=1.026, grad_norm=97.590, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.675e-05, train_time=2.707
+[gpub005:0/64] 2023-07-09 06:16:30,642 (trainer:732) INFO: 30epoch:train:601-700batch: iter_time=1.235e-04, forward_time=0.145, loss_ctc=72.761, loss_att=55.961, acc=0.699, loss=61.001, backward_time=1.029, grad_norm=115.744, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.674e-05, train_time=2.772
+[gpub005:0/64] 2023-07-09 06:18:50,359 (trainer:732) INFO: 30epoch:train:701-800batch: iter_time=1.281e-04, forward_time=0.145, loss_ctc=78.518, loss_att=61.887, acc=0.700, loss=66.876, backward_time=1.035, grad_norm=107.789, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.673e-05, train_time=2.794
+[gpub005:0/64] 2023-07-09 06:19:41,326 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub005:0/64] 2023-07-09 06:19:59,097 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 06:20:02,459 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f936191f0a0>)
+[gpub005:0/64] 2023-07-09 06:20:02,459 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub005:0/64] 2023-07-09 06:20:02,465 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 06:24:30,746 (trainer:732) INFO: 30epoch:train:801-900batch: iter_time=1.428, forward_time=0.152, loss_ctc=78.646, loss_att=64.512, acc=0.695, loss=68.752, backward_time=1.044, grad_norm=116.800, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.671e-05, train_time=6.808
+[gpub005:0/64] 2023-07-09 06:26:47,705 (trainer:732) INFO: 30epoch:train:901-1000batch: iter_time=1.117e-04, forward_time=0.145, loss_ctc=70.264, loss_att=56.691, acc=0.706, loss=60.763, backward_time=1.030, grad_norm=100.914, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.670e-05, train_time=2.739
+[gpub005:0/64] 2023-07-09 06:29:04,083 (trainer:732) INFO: 30epoch:train:1001-1100batch: iter_time=1.106e-04, forward_time=0.145, loss_ctc=72.147, loss_att=55.265, acc=0.704, loss=60.330, backward_time=1.028, grad_norm=115.205, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.669e-05, train_time=2.727
+[gpub005:0/64] 2023-07-09 06:31:19,765 (trainer:732) INFO: 30epoch:train:1101-1200batch: iter_time=1.295e-04, forward_time=0.145, loss_ctc=71.106, loss_att=51.456, acc=0.709, loss=57.351, backward_time=1.027, grad_norm=110.706, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.668e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 06:33:35,674 (trainer:732) INFO: 30epoch:train:1201-1300batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=73.058, loss_att=55.161, acc=0.705, loss=60.531, backward_time=1.028, grad_norm=104.330, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.667e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 06:35:51,423 (trainer:732) INFO: 30epoch:train:1301-1400batch: iter_time=1.195e-04, forward_time=0.146, loss_ctc=74.348, loss_att=54.096, acc=0.690, loss=60.172, backward_time=1.027, grad_norm=101.529, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.665e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 06:38:07,358 (trainer:732) INFO: 30epoch:train:1401-1500batch: iter_time=1.162e-04, forward_time=0.147, loss_ctc=66.464, loss_att=52.839, acc=0.721, loss=56.927, backward_time=1.028, grad_norm=90.007, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.664e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 06:40:22,816 (trainer:732) INFO: 30epoch:train:1501-1600batch: iter_time=1.294e-04, forward_time=0.145, loss_ctc=82.214, loss_att=60.679, acc=0.706, loss=67.140, backward_time=1.026, grad_norm=109.111, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.663e-05, train_time=2.709
+[gpub005:0/64] 2023-07-09 06:41:54,164 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub005:0/64] 2023-07-09 06:42:12,754 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 06:42:16,188 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93eea07f40>)
+[gpub005:0/64] 2023-07-09 06:42:16,188 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub005:0/64] 2023-07-09 06:42:16,194 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 06:46:07,824 (trainer:732) INFO: 30epoch:train:1601-1700batch: iter_time=1.414, forward_time=0.155, loss_ctc=72.256, loss_att=57.905, acc=0.707, loss=62.210, backward_time=1.043, grad_norm=109.824, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.184, optim0_lr0=6.662e-05, train_time=6.900
+[gpub005:0/64] 2023-07-09 06:48:25,273 (trainer:732) INFO: 30epoch:train:1701-1800batch: iter_time=1.219e-04, forward_time=0.146, loss_ctc=71.125, loss_att=56.929, acc=0.694, loss=61.188, backward_time=1.037, grad_norm=93.078, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.661e-05, train_time=2.749
+[gpub005:0/64] 2023-07-09 06:50:41,031 (trainer:732) INFO: 30epoch:train:1801-1900batch: iter_time=1.278e-04, forward_time=0.146, loss_ctc=68.578, loss_att=54.880, acc=0.697, loss=58.990, backward_time=1.028, grad_norm=105.880, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.660e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 06:52:57,455 (trainer:732) INFO: 30epoch:train:1901-2000batch: iter_time=0.001, forward_time=0.148, loss_ctc=71.582, loss_att=53.088, acc=0.699, loss=58.636, backward_time=1.028, grad_norm=122.721, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.658e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 06:55:13,258 (trainer:732) INFO: 30epoch:train:2001-2100batch: iter_time=1.386e-04, forward_time=0.146, loss_ctc=73.218, loss_att=56.105, acc=0.697, loss=61.239, backward_time=1.028, grad_norm=100.719, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.657e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 06:57:30,072 (trainer:732) INFO: 30epoch:train:2101-2200batch: iter_time=6.216e-04, forward_time=0.150, loss_ctc=74.433, loss_att=53.049, acc=0.690, loss=59.464, backward_time=1.031, grad_norm=103.823, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.656e-05, train_time=2.734
+[gpub005:0/64] 2023-07-09 06:59:47,604 (trainer:732) INFO: 30epoch:train:2201-2300batch: iter_time=6.988e-04, forward_time=0.161, loss_ctc=68.122, loss_att=55.081, acc=0.701, loss=58.993, backward_time=1.028, grad_norm=102.516, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.184, optim0_lr0=6.655e-05, train_time=2.752
+[gpub005:0/64] 2023-07-09 07:02:03,279 (trainer:732) INFO: 30epoch:train:2301-2400batch: iter_time=9.358e-04, forward_time=0.146, loss_ctc=81.052, loss_att=59.227, acc=0.699, loss=65.775, backward_time=1.027, grad_norm=123.594, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.654e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 07:04:18,965 (trainer:732) INFO: 30epoch:train:2401-2500batch: iter_time=1.129e-04, forward_time=0.146, loss_ctc=70.548, loss_att=54.052, acc=0.711, loss=59.001, backward_time=1.027, grad_norm=110.402, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.652e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 07:04:24,292 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub005:0/64] 2023-07-09 07:04:42,292 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 07:04:45,699 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f949a7875e0>)
+[gpub005:0/64] 2023-07-09 07:04:45,699 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub005:0/64] 2023-07-09 07:04:45,777 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 07:11:21,417 (trainer:732) INFO: 30epoch:train:2501-2600batch: iter_time=1.621, forward_time=0.147, loss_ctc=77.350, loss_att=59.850, acc=0.713, loss=65.100, backward_time=1.046, grad_norm=102.810, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.651e-05, train_time=8.449
+[gpub005:0/64] 2023-07-09 07:13:37,716 (trainer:732) INFO: 30epoch:train:2601-2700batch: iter_time=1.199e-04, forward_time=0.147, loss_ctc=64.507, loss_att=51.570, acc=0.715, loss=55.451, backward_time=1.029, grad_norm=104.183, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.650e-05, train_time=2.726
+[gpub005:0/64] 2023-07-09 07:15:54,008 (trainer:732) INFO: 30epoch:train:2701-2800batch: iter_time=1.175e-04, forward_time=0.145, loss_ctc=75.266, loss_att=54.346, acc=0.696, loss=60.622, backward_time=1.032, grad_norm=108.024, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.649e-05, train_time=2.726
+[gpub005:0/64] 2023-07-09 07:18:09,966 (trainer:732) INFO: 30epoch:train:2801-2900batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=68.932, loss_att=47.991, acc=0.727, loss=54.273, backward_time=1.030, grad_norm=91.719, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.648e-05, train_time=2.719
+[gpub005:0/64] 2023-07-09 07:20:25,937 (trainer:732) INFO: 30epoch:train:2901-3000batch: iter_time=1.119e-04, forward_time=0.147, loss_ctc=76.108, loss_att=56.964, acc=0.689, loss=62.707, backward_time=1.030, grad_norm=111.677, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.647e-05, train_time=2.719
+[gpub005:0/64] 2023-07-09 07:22:41,672 (trainer:732) INFO: 30epoch:train:3001-3100batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=68.551, loss_att=47.543, acc=0.711, loss=53.845, backward_time=1.028, grad_norm=94.809, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.645e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 07:24:57,391 (trainer:732) INFO: 30epoch:train:3101-3200batch: iter_time=1.156e-04, forward_time=0.146, loss_ctc=76.505, loss_att=59.918, acc=0.709, loss=64.894, backward_time=1.028, grad_norm=102.831, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.644e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 07:27:13,353 (trainer:732) INFO: 30epoch:train:3201-3300batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=74.867, loss_att=58.218, acc=0.713, loss=63.213, backward_time=1.029, grad_norm=165.724, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.643e-05, train_time=2.719
+[gpub005:0/64] 2023-07-09 07:28:00,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub005:0/64] 2023-07-09 07:28:18,947 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 07:28:22,591 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9327623460>)
+[gpub005:0/64] 2023-07-09 07:28:22,591 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub005:0/64] 2023-07-09 07:28:22,597 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 07:34:22,922 (trainer:732) INFO: 30epoch:train:3301-3400batch: iter_time=1.234, forward_time=0.170, loss_ctc=80.726, loss_att=62.733, acc=0.711, loss=68.131, backward_time=1.040, grad_norm=125.085, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.185, optim0_lr0=6.642e-05, train_time=8.591
+[gpub005:0/64] 2023-07-09 07:36:40,744 (trainer:732) INFO: 30epoch:train:3401-3500batch: iter_time=1.170e-04, forward_time=0.147, loss_ctc=69.925, loss_att=55.881, acc=0.710, loss=60.094, backward_time=1.036, grad_norm=119.662, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.641e-05, train_time=2.756
+[gpub005:0/64] 2023-07-09 07:38:56,449 (trainer:732) INFO: 30epoch:train:3501-3600batch: iter_time=1.146e-04, forward_time=0.146, loss_ctc=69.586, loss_att=53.200, acc=0.709, loss=58.116, backward_time=1.029, grad_norm=123.761, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.640e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 07:41:13,578 (trainer:732) INFO: 30epoch:train:3601-3700batch: iter_time=1.139e-04, forward_time=0.146, loss_ctc=71.179, loss_att=51.608, acc=0.710, loss=57.480, backward_time=1.027, grad_norm=100.747, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.638e-05, train_time=2.742
+[gpub005:0/64] 2023-07-09 07:43:30,037 (trainer:732) INFO: 30epoch:train:3701-3800batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=71.628, loss_att=52.400, acc=0.715, loss=58.169, backward_time=1.029, grad_norm=96.842, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.637e-05, train_time=2.729
+[gpub005:0/64] 2023-07-09 07:45:45,881 (trainer:732) INFO: 30epoch:train:3801-3900batch: iter_time=1.200e-04, forward_time=0.147, loss_ctc=72.724, loss_att=52.528, acc=0.693, loss=58.587, backward_time=1.028, grad_norm=108.896, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.636e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 07:48:11,019 (trainer:732) INFO: 30epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.147, loss_ctc=70.380, loss_att=54.615, acc=0.715, loss=59.344, backward_time=1.089, grad_norm=115.084, clip=100.000, loss_scale=7.737e+25, optim_step_time=0.183, optim0_lr0=6.635e-05, train_time=2.903
+[gpub005:0/64] 2023-07-09 07:50:29,931 (trainer:732) INFO: 30epoch:train:4001-4100batch: iter_time=1.231e-04, forward_time=0.147, loss_ctc=79.491, loss_att=57.454, acc=0.715, loss=64.065, backward_time=1.034, grad_norm=115.797, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.634e-05, train_time=2.778
+[gpub005:0/64] 2023-07-09 07:52:17,245 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub005:0/64] 2023-07-09 07:52:35,320 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 07:52:38,875 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9e22ec3520>)
+[gpub005:0/64] 2023-07-09 07:52:38,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub005:0/64] 2023-07-09 07:52:38,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 07:57:13,828 (trainer:732) INFO: 30epoch:train:4101-4200batch: iter_time=1.808, forward_time=0.172, loss_ctc=73.203, loss_att=63.564, acc=0.703, loss=66.455, backward_time=1.059, grad_norm=135.634, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.633e-05, train_time=8.078
+[gpub005:0/64] 2023-07-09 07:59:30,354 (trainer:732) INFO: 30epoch:train:4201-4300batch: iter_time=1.169e-04, forward_time=0.148, loss_ctc=77.444, loss_att=60.785, acc=0.709, loss=65.783, backward_time=1.032, grad_norm=116.536, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.631e-05, train_time=2.730
+[gpub005:0/64] 2023-07-09 08:01:50,592 (trainer:732) INFO: 30epoch:train:4301-4400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=64.714, loss_att=52.178, acc=0.713, loss=55.939, backward_time=1.030, grad_norm=93.861, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.630e-05, train_time=2.805
+[gpub005:0/64] 2023-07-09 08:04:18,788 (trainer:732) INFO: 30epoch:train:4401-4500batch: iter_time=1.166e-04, forward_time=0.147, loss_ctc=74.033, loss_att=54.299, acc=0.707, loss=60.219, backward_time=1.056, grad_norm=102.941, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.629e-05, train_time=2.964
+[gpub005:0/64] 2023-07-09 08:06:38,566 (trainer:732) INFO: 30epoch:train:4501-4600batch: iter_time=1.187e-04, forward_time=0.144, loss_ctc=68.971, loss_att=48.825, acc=0.724, loss=54.868, backward_time=1.033, grad_norm=97.902, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.628e-05, train_time=2.795
+[gpub005:0/64] 2023-07-09 08:09:01,583 (trainer:732) INFO: 30epoch:train:4601-4700batch: iter_time=1.106e-04, forward_time=0.144, loss_ctc=74.694, loss_att=55.882, acc=0.686, loss=61.525, backward_time=1.036, grad_norm=154.070, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.627e-05, train_time=2.860
+[gpub005:0/64] 2023-07-09 08:11:17,207 (trainer:732) INFO: 30epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.145, loss_ctc=69.389, loss_att=49.217, acc=0.709, loss=55.269, backward_time=1.024, grad_norm=102.401, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.626e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 08:13:34,842 (trainer:732) INFO: 30epoch:train:4801-4900batch: iter_time=1.201e-04, forward_time=0.146, loss_ctc=76.215, loss_att=58.985, acc=0.716, loss=64.154, backward_time=1.029, grad_norm=102.975, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.624e-05, train_time=2.752
+[gpub005:0/64] 2023-07-09 08:15:50,733 (trainer:732) INFO: 30epoch:train:4901-5000batch: iter_time=1.189e-04, forward_time=0.147, loss_ctc=73.406, loss_att=57.659, acc=0.715, loss=62.383, backward_time=1.029, grad_norm=107.677, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.623e-05, train_time=2.718
+[gpub005:0/64] 2023-07-09 08:15:55,286 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub005:0/64] 2023-07-09 08:16:13,554 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 08:16:17,414 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92f7ad34f0>)
+[gpub005:0/64] 2023-07-09 08:16:17,414 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub005:0/64] 2023-07-09 08:16:17,420 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 08:21:39,635 (trainer:732) INFO: 30epoch:train:5001-5100batch: iter_time=1.348, forward_time=0.148, loss_ctc=79.858, loss_att=65.978, acc=0.708, loss=70.142, backward_time=1.055, grad_norm=110.794, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.622e-05, train_time=6.978
+[gpub005:0/64] 2023-07-09 08:24:02,275 (trainer:732) INFO: 30epoch:train:5101-5200batch: iter_time=1.025e-04, forward_time=0.147, loss_ctc=68.121, loss_att=55.090, acc=0.713, loss=58.999, backward_time=1.036, grad_norm=97.120, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.621e-05, train_time=2.853
+[gpub005:0/64] 2023-07-09 08:26:17,891 (trainer:732) INFO: 30epoch:train:5201-5300batch: iter_time=1.282e-04, forward_time=0.146, loss_ctc=67.439, loss_att=45.809, acc=0.718, loss=52.298, backward_time=1.026, grad_norm=94.714, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.620e-05, train_time=2.712
+[gpub005:0/64] 2023-07-09 08:28:33,694 (trainer:732) INFO: 30epoch:train:5301-5400batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=65.888, loss_att=50.949, acc=0.715, loss=55.431, backward_time=1.027, grad_norm=105.113, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.619e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 08:30:49,534 (trainer:732) INFO: 30epoch:train:5401-5500batch: iter_time=1.314e-04, forward_time=0.146, loss_ctc=75.721, loss_att=55.905, acc=0.700, loss=61.850, backward_time=1.027, grad_norm=107.472, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.617e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 08:33:06,008 (trainer:732) INFO: 30epoch:train:5501-5600batch: iter_time=1.142e-04, forward_time=0.147, loss_ctc=72.755, loss_att=50.694, acc=0.711, loss=57.312, backward_time=1.028, grad_norm=108.338, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.616e-05, train_time=2.729
+[gpub005:0/64] 2023-07-09 08:35:21,994 (trainer:732) INFO: 30epoch:train:5601-5700batch: iter_time=9.944e-05, forward_time=0.147, loss_ctc=71.018, loss_att=55.206, acc=0.714, loss=59.950, backward_time=1.029, grad_norm=106.123, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.615e-05, train_time=2.719
+[gpub005:0/64] 2023-07-09 08:37:37,976 (trainer:732) INFO: 30epoch:train:5701-5800batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=75.038, loss_att=60.040, acc=0.717, loss=64.539, backward_time=1.029, grad_norm=119.425, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.614e-05, train_time=2.719
+[gpub005:0/64] 2023-07-09 08:38:26,516 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub005:0/64] 2023-07-09 08:38:44,159 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 08:38:47,519 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f931070f4f0>)
+[gpub005:0/64] 2023-07-09 08:38:47,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub005:0/64] 2023-07-09 08:38:47,581 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 08:44:06,011 (trainer:732) INFO: 30epoch:train:5801-5900batch: iter_time=1.453, forward_time=0.147, loss_ctc=74.094, loss_att=57.930, acc=0.715, loss=62.779, backward_time=1.047, grad_norm=106.855, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.613e-05, train_time=7.760
+[gpub005:0/64] 2023-07-09 08:46:23,423 (trainer:732) INFO: 30epoch:train:5901-6000batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=66.844, loss_att=52.858, acc=0.715, loss=57.054, backward_time=1.032, grad_norm=103.900, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.612e-05, train_time=2.748
+[gpub005:0/64] 2023-07-09 08:48:41,143 (trainer:732) INFO: 30epoch:train:6001-6100batch: iter_time=1.043e-04, forward_time=0.146, loss_ctc=65.513, loss_att=47.511, acc=0.713, loss=52.911, backward_time=1.033, grad_norm=110.804, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.610e-05, train_time=2.754
+[gpub005:0/64] 2023-07-09 08:50:57,547 (trainer:732) INFO: 30epoch:train:6101-6200batch: iter_time=1.083e-04, forward_time=0.146, loss_ctc=66.188, loss_att=50.999, acc=0.714, loss=55.556, backward_time=1.029, grad_norm=98.669, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.609e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 08:53:13,408 (trainer:732) INFO: 30epoch:train:6201-6300batch: iter_time=1.085e-04, forward_time=0.146, loss_ctc=75.870, loss_att=56.326, acc=0.701, loss=62.189, backward_time=1.029, grad_norm=108.691, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.608e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 08:55:30,067 (trainer:732) INFO: 30epoch:train:6301-6400batch: iter_time=1.029e-04, forward_time=0.146, loss_ctc=72.916, loss_att=50.933, acc=0.708, loss=57.528, backward_time=1.028, grad_norm=107.518, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.607e-05, train_time=2.733
+[gpub005:0/64] 2023-07-09 08:57:48,148 (trainer:732) INFO: 30epoch:train:6401-6500batch: iter_time=1.077e-04, forward_time=0.146, loss_ctc=71.883, loss_att=55.390, acc=0.712, loss=60.338, backward_time=1.029, grad_norm=98.625, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.606e-05, train_time=2.761
+[gpub005:0/64] 2023-07-09 09:00:03,990 (trainer:732) INFO: 30epoch:train:6501-6600batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=74.721, loss_att=57.672, acc=0.714, loss=62.787, backward_time=1.029, grad_norm=99.654, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.605e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 09:01:37,401 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub005:0/64] 2023-07-09 09:01:55,924 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 09:01:59,360 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9345973700>)
+[gpub005:0/64] 2023-07-09 09:01:59,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub005:0/64] 2023-07-09 09:01:59,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 09:05:31,034 (trainer:732) INFO: 30epoch:train:6601-6700batch: iter_time=1.189, forward_time=0.188, loss_ctc=75.246, loss_att=59.358, acc=0.716, loss=64.124, backward_time=1.048, grad_norm=107.046, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.185, optim0_lr0=6.604e-05, train_time=6.541
+[gpub005:0/64] 2023-07-09 09:07:47,651 (trainer:732) INFO: 30epoch:train:6701-6800batch: iter_time=1.161e-04, forward_time=0.145, loss_ctc=76.815, loss_att=61.302, acc=0.695, loss=65.956, backward_time=1.029, grad_norm=110.285, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.602e-05, train_time=2.732
+[gpub005:0/64] 2023-07-09 09:10:03,733 (trainer:732) INFO: 30epoch:train:6801-6900batch: iter_time=1.232e-04, forward_time=0.147, loss_ctc=63.395, loss_att=53.738, acc=0.705, loss=56.635, backward_time=1.029, grad_norm=93.495, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.601e-05, train_time=2.721
+[gpub005:0/64] 2023-07-09 09:12:20,215 (trainer:732) INFO: 30epoch:train:6901-7000batch: iter_time=1.293e-04, forward_time=0.146, loss_ctc=73.005, loss_att=53.910, acc=0.703, loss=59.639, backward_time=1.029, grad_norm=116.643, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.600e-05, train_time=2.729
+[gpub005:0/64] 2023-07-09 09:14:35,852 (trainer:732) INFO: 30epoch:train:7001-7100batch: iter_time=1.414e-04, forward_time=0.146, loss_ctc=68.606, loss_att=50.078, acc=0.714, loss=55.637, backward_time=1.028, grad_norm=91.992, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.599e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 09:16:56,392 (trainer:732) INFO: 30epoch:train:7101-7200batch: iter_time=1.186e-04, forward_time=0.174, loss_ctc=72.330, loss_att=55.852, acc=0.689, loss=60.795, backward_time=1.029, grad_norm=103.971, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.184, optim0_lr0=6.598e-05, train_time=2.811
+[gpub005:0/64] 2023-07-09 09:19:14,156 (trainer:732) INFO: 30epoch:train:7201-7300batch: iter_time=1.131e-04, forward_time=0.161, loss_ctc=66.916, loss_att=49.747, acc=0.706, loss=54.898, backward_time=1.027, grad_norm=91.907, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.597e-05, train_time=2.754
+[gpub005:0/64] 2023-07-09 09:21:33,212 (trainer:732) INFO: 30epoch:train:7301-7400batch: iter_time=1.143e-04, forward_time=0.147, loss_ctc=74.799, loss_att=59.157, acc=0.709, loss=63.849, backward_time=1.031, grad_norm=114.878, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.595e-05, train_time=2.782
+[gpub005:0/64] 2023-07-09 09:23:48,998 (trainer:732) INFO: 30epoch:train:7401-7500batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=71.607, loss_att=57.464, acc=0.706, loss=61.707, backward_time=1.027, grad_norm=123.667, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.594e-05, train_time=2.715
+[gpub005:0/64] 2023-07-09 09:24:03,715 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub005:0/64] 2023-07-09 09:24:22,085 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 09:24:25,849 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94ccd3c7c0>)
+[gpub005:0/64] 2023-07-09 09:24:25,849 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub005:0/64] 2023-07-09 09:24:25,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 09:30:03,490 (trainer:732) INFO: 30epoch:train:7501-7600batch: iter_time=2.212, forward_time=0.145, loss_ctc=79.143, loss_att=65.331, acc=0.698, loss=69.474, backward_time=1.041, grad_norm=116.935, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.593e-05, train_time=7.490
+[gpub005:0/64] 2023-07-09 09:32:20,507 (trainer:732) INFO: 30epoch:train:7601-7700batch: iter_time=1.198e-04, forward_time=0.147, loss_ctc=68.209, loss_att=54.927, acc=0.710, loss=58.912, backward_time=1.029, grad_norm=98.114, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.592e-05, train_time=2.740
+[gpub005:0/64] 2023-07-09 09:34:37,364 (trainer:732) INFO: 30epoch:train:7701-7800batch: iter_time=1.241e-04, forward_time=0.145, loss_ctc=67.320, loss_att=47.178, acc=0.705, loss=53.221, backward_time=1.028, grad_norm=107.580, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.591e-05, train_time=2.737
+[gpub005:0/64] 2023-07-09 09:36:52,907 (trainer:732) INFO: 30epoch:train:7801-7900batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=65.382, loss_att=51.048, acc=0.714, loss=55.348, backward_time=1.025, grad_norm=94.448, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.590e-05, train_time=2.711
+[gpub005:0/64] 2023-07-09 09:39:08,490 (trainer:732) INFO: 30epoch:train:7901-8000batch: iter_time=1.221e-04, forward_time=0.145, loss_ctc=74.717, loss_att=57.283, acc=0.693, loss=62.513, backward_time=1.026, grad_norm=109.439, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.589e-05, train_time=2.711
+[gpub005:0/64] 2023-07-09 09:41:24,030 (trainer:732) INFO: 30epoch:train:8001-8100batch: iter_time=1.290e-04, forward_time=0.145, loss_ctc=72.670, loss_att=51.262, acc=0.705, loss=57.684, backward_time=1.025, grad_norm=114.707, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.587e-05, train_time=2.711
+[gpub005:0/64] 2023-07-09 09:43:40,044 (trainer:732) INFO: 30epoch:train:8101-8200batch: iter_time=1.192e-04, forward_time=0.146, loss_ctc=71.182, loss_att=53.993, acc=0.710, loss=59.150, backward_time=1.030, grad_norm=103.073, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.586e-05, train_time=2.720
+[gpub005:0/64] 2023-07-09 09:45:55,894 (trainer:732) INFO: 30epoch:train:8201-8300batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=75.740, loss_att=60.546, acc=0.707, loss=65.104, backward_time=1.028, grad_norm=105.365, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.585e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 09:46:42,025 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub005:0/64] 2023-07-09 09:47:00,609 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 09:47:04,055 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94ccd3f6d0>)
+[gpub005:0/64] 2023-07-09 09:47:04,055 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub005:0/64] 2023-07-09 09:47:04,061 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 09:52:57,152 (trainer:732) INFO: 30epoch:train:8301-8400batch: iter_time=1.228, forward_time=0.146, loss_ctc=77.226, loss_att=61.263, acc=0.697, loss=66.052, backward_time=1.037, grad_norm=105.579, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.584e-05, train_time=8.425
+[gpub005:0/64] 2023-07-09 09:55:13,828 (trainer:732) INFO: 30epoch:train:8401-8500batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=69.146, loss_att=55.329, acc=0.709, loss=59.474, backward_time=1.029, grad_norm=107.471, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.182, optim0_lr0=6.583e-05, train_time=2.733
+[gpub005:0/64] 2023-07-09 09:57:29,536 (trainer:732) INFO: 30epoch:train:8501-8600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=66.513, loss_att=53.263, acc=0.703, loss=57.238, backward_time=1.025, grad_norm=96.505, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.582e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 09:59:45,955 (trainer:732) INFO: 30epoch:train:8601-8700batch: iter_time=1.298e-04, forward_time=0.146, loss_ctc=70.589, loss_att=51.866, acc=0.707, loss=57.483, backward_time=1.029, grad_norm=107.538, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.581e-05, train_time=2.728
+[gpub005:0/64] 2023-07-09 10:02:01,664 (trainer:732) INFO: 30epoch:train:8701-8800batch: iter_time=1.318e-04, forward_time=0.146, loss_ctc=70.315, loss_att=53.128, acc=0.709, loss=58.284, backward_time=1.029, grad_norm=101.284, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.579e-05, train_time=2.714
+[gpub005:0/64] 2023-07-09 10:04:17,103 (trainer:732) INFO: 30epoch:train:8801-8900batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=75.096, loss_att=52.904, acc=0.696, loss=59.561, backward_time=1.027, grad_norm=104.484, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.578e-05, train_time=2.709
+[gpub005:0/64] 2023-07-09 10:06:32,753 (trainer:732) INFO: 30epoch:train:8901-9000batch: iter_time=1.248e-04, forward_time=0.146, loss_ctc=67.620, loss_att=53.536, acc=0.711, loss=57.762, backward_time=1.028, grad_norm=97.983, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.577e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 10:08:48,415 (trainer:732) INFO: 30epoch:train:9001-9100batch: iter_time=1.166e-04, forward_time=0.146, loss_ctc=79.714, loss_att=58.394, acc=0.703, loss=64.790, backward_time=1.029, grad_norm=104.995, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.576e-05, train_time=2.713
+[gpub005:0/64] 2023-07-09 10:10:19,724 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub005:0/64] 2023-07-09 10:10:37,937 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub005:0/64] 2023-07-09 10:10:41,460 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92f4187130>)
+[gpub005:0/64] 2023-07-09 10:10:41,461 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub005:0/64] 2023-07-09 10:10:41,467 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub005:0/64] 2023-07-09 10:13:44,726 (trainer:732) INFO: 30epoch:train:9101-9200batch: iter_time=1.276, forward_time=0.172, loss_ctc=72.067, loss_att=56.062, acc=0.707, loss=60.863, backward_time=1.041, grad_norm=115.598, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.184, optim0_lr0=6.575e-05, train_time=5.926
+[gpub005:0/64] 2023-07-09 10:16:01,376 (trainer:732) INFO: 30epoch:train:9201-9300batch: iter_time=1.195e-04, forward_time=0.145, loss_ctc=70.122, loss_att=56.197, acc=0.700, loss=60.374, backward_time=1.030, grad_norm=113.875, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.574e-05, train_time=2.733
+[gpub005:0/64] 2023-07-09 10:18:17,221 (trainer:732) INFO: 30epoch:train:9301-9400batch: iter_time=1.348e-04, forward_time=0.145, loss_ctc=66.656, loss_att=53.634, acc=0.703, loss=57.541, backward_time=1.027, grad_norm=107.518, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.573e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 10:20:33,434 (trainer:732) INFO: 30epoch:train:9401-9500batch: iter_time=1.426e-04, forward_time=0.146, loss_ctc=70.982, loss_att=51.945, acc=0.706, loss=57.656, backward_time=1.028, grad_norm=103.989, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.572e-05, train_time=2.724
+[gpub005:0/64] 2023-07-09 10:22:49,713 (trainer:732) INFO: 30epoch:train:9501-9600batch: iter_time=1.236e-04, forward_time=0.146, loss_ctc=71.163, loss_att=53.531, acc=0.707, loss=58.820, backward_time=1.028, grad_norm=106.954, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.570e-05, train_time=2.725
+[gpub005:0/64] 2023-07-09 10:25:05,526 (trainer:732) INFO: 30epoch:train:9601-9700batch: iter_time=1.256e-04, forward_time=0.145, loss_ctc=72.003, loss_att=50.687, acc=0.699, loss=57.082, backward_time=1.026, grad_norm=95.981, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.569e-05, train_time=2.716
+[gpub005:0/64] 2023-07-09 10:27:21,391 (trainer:732) INFO: 30epoch:train:9701-9800batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=67.314, loss_att=53.367, acc=0.709, loss=57.551, backward_time=1.028, grad_norm=101.436, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.568e-05, train_time=2.717
+[gpub005:0/64] 2023-07-09 10:29:37,738 (trainer:732) INFO: 30epoch:train:9801-9900batch: iter_time=1.103e-04, forward_time=0.144, loss_ctc=79.876, loss_att=57.750, acc=0.705, loss=64.387, backward_time=1.026, grad_norm=108.714, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.567e-05, train_time=2.727
+[gpub005:0/64] 2023-07-09 10:31:53,636 (trainer:732) INFO: 30epoch:train:9901-10000batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=69.642, loss_att=52.891, acc=0.711, loss=57.916, backward_time=1.026, grad_norm=127.204, clip=100.000, loss_scale=3.095e+26, optim_step_time=0.183, optim0_lr0=6.566e-05, train_time=2.718
+gpub030:2531971:2532059 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub018:1650755:1650841 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub012:1607819:1607909 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub014:1495254:1495339 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub014:1495256:1495336 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub030:2531969:2532058 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub030:2531970:2532060 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub096:1645785:1645863 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub097:1705871:1705965 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub013:1694053:1694137 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub013:1694056:1694139 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub013:1694055:1694136 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub018:1650753:1650840 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub098:1875740:1875815 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub098:1875738:1875816 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub098:1875741:1875817 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub012:1607821:1607907 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub012:1607818:1607910 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub014:1495255:1495338 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub014:1495257:1495337 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub097:1705870:1705966 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub030:2531972:2532057 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub018:1650756:1650842 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub096:1645786:1645864 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub096:1645787:1645865 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub096:1645784:1645866 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub012:1607820:1607908 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub018:1650755:1650755 [2] NCCL INFO comm 0x513374c0 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub039:2093177:2093253 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub039:2093176:2093252 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub018:1650754:1650839 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub095:2520060:2520144 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub039:2093175:2093254 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub097:1705871:1705871 [3] NCCL INFO comm 0x94d2db0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub039:2093178:2093251 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub040:2093693:2093783 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub040:2093691:2093781 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub040:2093692:2093780 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub095:2520059:2520143 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub040:2093690:2093782 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub072:1805521:1805611 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub095:2520061:2520145 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub013:1694056:1694056 [3] NCCL INFO comm 0x8c00090 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub098:1875739:1875818 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub095:2520062:2520146 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub072:1805522:1805612 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub097:1705868:1705963 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub097:1705869:1705964 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub072:1805519:1805613 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub005:2408154:2408234 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub072:1805520:1805610 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub098:1875739:1875739 [1] NCCL INFO comm 0x4ffeee90 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub098:1875740:1875740 [2] NCCL INFO comm 0x8c9fbb0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub030:2531969:2531969 [0] NCCL INFO comm 0xb4f6de0 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub013:1694055:1694055 [2] NCCL INFO comm 0xf6b9b10 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub013:1694053:1694053 [0] NCCL INFO comm 0x8c6ae750 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub096:1645785:1645785 [1] NCCL INFO comm 0x50f7e840 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub097:1705870:1705870 [2] NCCL INFO comm 0x50f117a0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub012:1607820:1607820 [2] NCCL INFO comm 0x503f1430 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub098:1875741:1875741 [3] NCCL INFO comm 0x4ecd4ee0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub072:1805521:1805521 [2] NCCL INFO comm 0x8d829e60 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub096:1645786:1645786 [2] NCCL INFO comm 0x4fe9cb90 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub039:2093177:2093177 [2] NCCL INFO comm 0xa965b10 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub013:1694054:1694138 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub072:1805520:1805520 [1] NCCL INFO comm 0xb6f41780 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub098:1875738:1875738 [0] NCCL INFO comm 0x9e5ca730 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub030:2531972:2531972 [3] NCCL INFO comm 0xa2cf1d0 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub018:1650753:1650753 [0] NCCL INFO comm 0x4f7a1b90 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub097:1705869:1705869 [1] NCCL INFO comm 0x8e89510 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub018:1650756:1650756 [3] NCCL INFO comm 0x8c504da0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub012:1607818:1607818 [0] NCCL INFO comm 0xa8f3bc80 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub072:1805519:1805519 [0] NCCL INFO comm 0x4fb13ad0 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub005:2408152:2408235 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub005:2408153:2408237 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub030:2531970:2531970 [1] NCCL INFO comm 0x8ebc3340 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub030:2531971:2531971 [2] NCCL INFO comm 0x8dd18cd0 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub095:2520059:2520059 [0] NCCL INFO comm 0x15bb1c50 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub039:2093175:2093175 [0] NCCL INFO comm 0xa2cab60 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub012:1607819:1607819 [1] NCCL INFO comm 0xa4ee840 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub072:1805522:1805522 [3] NCCL INFO comm 0x50740450 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub005:2408154:2408154 [3] NCCL INFO comm 0x519a6580 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub018:1650754:1650754 [1] NCCL INFO comm 0xa938d420 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub039:2093176:2093176 [1] NCCL INFO comm 0xbcbaabd0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub095:2520061:2520061 [2] NCCL INFO comm 0x91b7930 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub096:1645787:1645787 [3] NCCL INFO comm 0xb78b6390 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub012:1607821:1607821 [3] NCCL INFO comm 0x516c3430 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub039:2093178:2093178 [3] NCCL INFO comm 0x4fc75960 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub005:2408153:2408153 [2] NCCL INFO comm 0x4fab6870 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub084:4052710:4052803 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub040:2093690:2093690 [0] NCCL INFO comm 0xba9dc4d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub014:1495254:1495254 [0] NCCL INFO comm 0x50fe0a80 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub084:4052709:4052802 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub084:4052708:4052804 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub084:4052711:4052801 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub096:1645784:1645784 [0] NCCL INFO comm 0xcdcc14f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub014:1495256:1495256 [2] NCCL INFO comm 0x9f383a90 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub005:2408152:2408152 [1] NCCL INFO comm 0x50e7e140 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub040:2093692:2093692 [2] NCCL INFO comm 0x514bd130 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub013:1694054:1694054 [1] NCCL INFO comm 0x5088d590 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub095:2520062:2520062 [3] NCCL INFO comm 0x8c7104c0 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub095:2520060:2520060 [1] NCCL INFO comm 0xb4653490 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub040:2093691:2093691 [1] NCCL INFO comm 0xb9336880 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub014:1495257:1495257 [3] NCCL INFO comm 0x946a450 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub097:1705868:1705868 [0] NCCL INFO comm 0x4f565ad0 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub040:2093693:2093693 [3] NCCL INFO comm 0xbd6eac10 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub014:1495255:1495255 [1] NCCL INFO comm 0x515d3c50 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub084:4052708:4052708 [0] NCCL INFO comm 0xb576c9d0 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub084:4052710:4052710 [2] NCCL INFO comm 0x4f81fce0 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub084:4052709:4052709 [1] NCCL INFO comm 0xd834420 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub084:4052711:4052711 [3] NCCL INFO comm 0xa5710b50 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub041:1527385:1527468 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub041:1527384:1527467 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub041:1527385:1527385 [2] NCCL INFO comm 0x5082e9e0 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub041:1527383:1527470 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub041:1527386:1527469 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub041:1527386:1527386 [3] NCCL INFO comm 0x4f979490 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub041:1527384:1527384 [1] NCCL INFO comm 0x512a04d0 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub041:1527383:1527383 [0] NCCL INFO comm 0x5103b480 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub067:1574057:1574141 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub067:1574055:1574142 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub067:1574056:1574140 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub067:1574057:1574057 [3] NCCL INFO comm 0x8d973650 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub067:1574055:1574055 [1] NCCL INFO comm 0x509b90f0 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub067:1574054:1574139 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub067:1574056:1574056 [2] NCCL INFO comm 0xb006d7d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub067:1574054:1574054 [0] NCCL INFO comm 0x4f342150 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+[gpub005:0/64] 2023-07-09 10:43:52,593 (trainer:338) INFO: 30epoch results: [train] iter_time=0.191, forward_time=0.148, loss_ctc=72.388, loss_att=55.270, acc=0.706, loss=60.405, backward_time=1.032, grad_norm=108.407, clip=100.000, loss_scale=1.547e+26, optim_step_time=0.183, optim0_lr0=6.623e-05, train_time=3.318, time=4 hours, 36 minutes and 42.57 seconds, total_count=270000, gpu_max_cached_mem_GB=38.234, [valid] loss_ctc=46.634, cer_ctc=0.267, loss_att=40.617, acc=0.667, cer=0.399, wer=0.998, loss=42.422, time=5 minutes and 56.2 seconds, total_count=27830, gpu_max_cached_mem_GB=38.234, [att_plot] time=5 minutes and 53.71 seconds, total_count=0, gpu_max_cached_mem_GB=38.234
+[gpub005:0/64] 2023-07-09 10:44:08,080 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub005:0/64] 2023-07-09 10:44:08,120 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till30epoch.pth
+[gpub005:0/64] 2023-07-09 10:45:00,456 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till30epoch.pth
+[gpub005:0/64] 2023-07-09 10:45:39,702 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/22epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/25epoch.pth
+[gpub005:0/64] 2023-07-09 10:45:39,703 (trainer:458) INFO: The training was finished at 30 epochs 
+[gpub005:0/64] 2023-07-09 10:45:39,705 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.pth
+[gpub005:0/64] 2023-07-09 10:45:50,816 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.pth
+gpub005:2408151:2408236 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub005:2408151:2408151 [0] NCCL INFO comm 0x8dda0850 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+# Accounting: begin_time=1688778318
+# Accounting: end_time=1688917563
+# Accounting: time=139245 threads=1
+# Finished at Sun Jul 9 10:46:03 CDT 2023 with status 0
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log
new file mode 100644
index 0000000000000000000000000000000000000000..8db1f95dd1757f8d117cc406623f88fa4234d53f
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.6.log
@@ -0,0 +1,5571 @@
+# Running on gpua003.delta.ncsa.illinois.edu
+# Started at Wed Jul 5 22:37:23 CDT 2023
+# SLURMD_NODENAME=gpua003
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2132611
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2132611
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA100x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpua[003,005,010,025,028-029,031,035,053,055,057,060,074,087,090,098]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=350544
+# SLURM_TOPOLOGY_ADDR=ss00.ss05.gpua003
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_409154d5-fd37-4757-b90c-3838c14071d0
+[gpua003:0/64] 2023-07-05 22:40:37,448 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpua003:0/64] 2023-07-05 22:40:38,431 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpua003:0/64] 2023-07-05 22:40:38,458 (s2t:483) INFO: Vocabulary size: 50002
+[gpua003:0/64] 2023-07-05 22:40:52,612 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpua003:0/64] 2023-07-05 22:40:52,621 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpua003:0/64] 2023-07-05 22:40:52,630 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpua003:0/64] 2023-07-05 22:40:53,329 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpua003:0/64] 2023-07-05 22:41:01,373 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9792293eb0>)
+[gpua003:0/64] 2023-07-05 22:41:01,580 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua003:0/64] 2023-07-05 22:41:01,582 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpua003:0/64] 2023-07-05 22:41:02,091 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:41:02,408 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9792293b50>)
+[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpua003:0/64] 2023-07-05 22:41:02,409 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpua003:0/64] 2023-07-05 22:41:33,411 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpua003:350633:350633 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350633:350633 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350633:350633 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpua003:0/64] 2023-07-05 22:41:38,247 (trainer:284) INFO: 14/100epoch started
+[gpua003:0/64] 2023-07-05 22:41:38,292 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-05 22:41:57,218 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:42:00,682 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95d55c5450>)
+[gpua003:0/64] 2023-07-05 22:42:00,683 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-05 22:42:00,689 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpua031:1680700:1680700 [0] NCCL INFO cudaDriverVersion 12010
+gpua031:1680700:1680700 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680700:1680700 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680700:1680773 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680700:1680773 [0] NCCL INFO Using network IB
+gpua031:1680700:1680773 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua031:1680700:1680773 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC/read
+gpua031:1680700:1680773 [0] NCCL INFO Connected all rings
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpua031:1680700:1680773 [0] NCCL INFO Connected all trees
+gpua031:1680700:1680773 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680700:1680773 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680700:1680773 [0] NCCL INFO comm 0xb9862e50 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua031:1680703:1680703 [3] NCCL INFO cudaDriverVersion 12010
+gpua031:1680703:1680703 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680703:1680703 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680703:1680772 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680703:1680772 [3] NCCL INFO Using network IB
+gpua031:1680703:1680772 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua031:1680703:1680772 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpua031:1680703:1680772 [3] NCCL INFO Connected all rings
+gpua031:1680703:1680772 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua031:1680703:1680772 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC/read
+gpua031:1680703:1680772 [3] NCCL INFO Connected all trees
+gpua031:1680703:1680772 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680703:1680772 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680703:1680772 [3] NCCL INFO comm 0x5195fe00 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua031:1680702:1680702 [2] NCCL INFO cudaDriverVersion 12010
+gpua031:1680702:1680702 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680702:1680702 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680702:1680774 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680702:1680774 [2] NCCL INFO Using network IB
+gpua031:1680702:1680774 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua031:1680702:1680774 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Connected all rings
+gpua031:1680702:1680774 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC/read
+gpua031:1680702:1680774 [2] NCCL INFO Connected all trees
+gpua031:1680702:1680774 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680702:1680774 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680702:1680774 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua031:1680701:1680701 [1] NCCL INFO cudaDriverVersion 12010
+gpua031:1680701:1680701 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.31<0>
+gpua031:1680701:1680701 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua031:1680701:1680775 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.31<0>
+gpua031:1680701:1680775 [1] NCCL INFO Using network IB
+gpua031:1680701:1680775 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua031:1680701:1680775 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Connected all rings
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpua031:1680701:1680775 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC/read
+gpua031:1680701:1680775 [1] NCCL INFO Connected all trees
+gpua031:1680701:1680775 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua031:1680701:1680775 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua031:1680701:1680775 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua029:1226924:1226924 [3] NCCL INFO cudaDriverVersion 12010
+gpua029:1226924:1226924 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226924:1226924 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226924:1226999 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226924:1226999 [3] NCCL INFO Using network IB
+gpua029:1226924:1226999 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua029:1226924:1226999 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpua029:1226924:1226999 [3] NCCL INFO Connected all rings
+gpua029:1226924:1226999 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua029:1226924:1226999 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC/read
+gpua029:1226924:1226999 [3] NCCL INFO Connected all trees
+gpua029:1226924:1226999 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226924:1226999 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226924:1226999 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua029:1226921:1226921 [0] NCCL INFO cudaDriverVersion 12010
+gpua029:1226921:1226921 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226921:1226921 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226921:1226997 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226921:1226997 [0] NCCL INFO Using network IB
+gpua029:1226921:1226997 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua029:1226921:1226997 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC/read
+gpua029:1226921:1226997 [0] NCCL INFO Connected all rings
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpua029:1226921:1226997 [0] NCCL INFO Connected all trees
+gpua029:1226921:1226997 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226921:1226997 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226921:1226997 [0] NCCL INFO comm 0x8dcadfd0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua074:989792:989792 [1] NCCL INFO cudaDriverVersion 12010
+gpua074:989792:989792 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989792:989792 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989792:989862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989792:989862 [1] NCCL INFO Using network IB
+gpua074:989792:989862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua074:989792:989862 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Connected all rings
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpua074:989792:989862 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC/read
+gpua074:989792:989862 [1] NCCL INFO Connected all trees
+gpua074:989792:989862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989792:989862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989792:989862 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua074:989794:989794 [3] NCCL INFO cudaDriverVersion 12010
+gpua074:989794:989794 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989794:989794 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989794:989863 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989794:989863 [3] NCCL INFO Using network IB
+gpua074:989794:989863 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua074:989794:989863 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpua074:989794:989863 [3] NCCL INFO Connected all rings
+gpua074:989794:989863 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua074:989794:989863 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC/read
+gpua074:989794:989863 [3] NCCL INFO Connected all trees
+gpua074:989794:989863 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989794:989863 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989794:989863 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua074:989793:989793 [2] NCCL INFO cudaDriverVersion 12010
+gpua074:989793:989793 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989793:989793 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989793:989861 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989793:989861 [2] NCCL INFO Using network IB
+gpua074:989793:989861 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua074:989793:989861 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Connected all rings
+gpua074:989793:989861 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC/read
+gpua074:989793:989861 [2] NCCL INFO Connected all trees
+gpua074:989793:989861 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989793:989861 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989793:989861 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua029:1226922:1226922 [1] NCCL INFO cudaDriverVersion 12010
+gpua029:1226922:1226922 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226922:1226922 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226922:1226996 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226922:1226996 [1] NCCL INFO Using network IB
+gpua029:1226922:1226996 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua029:1226922:1226996 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Connected all rings
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpua029:1226922:1226996 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC/read
+gpua029:1226922:1226996 [1] NCCL INFO Connected all trees
+gpua029:1226922:1226996 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226922:1226996 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226922:1226996 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua087:2330953:2330953 [0] NCCL INFO cudaDriverVersion 12010
+gpua087:2330953:2330953 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330953:2330953 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330953:2331026 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330953:2331026 [0] NCCL INFO Using network IB
+gpua087:2330953:2331026 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua087:2330953:2331026 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC/read
+gpua087:2330953:2331026 [0] NCCL INFO Connected all rings
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpua087:2330953:2331026 [0] NCCL INFO Connected all trees
+gpua087:2330953:2331026 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330953:2331026 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330953:2331026 [0] NCCL INFO comm 0x8805010 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua087:2330955:2330955 [2] NCCL INFO cudaDriverVersion 12010
+gpua087:2330955:2330955 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330955:2330955 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330955:2331028 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330955:2331028 [2] NCCL INFO Using network IB
+gpua087:2330955:2331028 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua087:2330955:2331028 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Connected all rings
+gpua087:2330955:2331028 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC/read
+gpua087:2330955:2331028 [2] NCCL INFO Connected all trees
+gpua087:2330955:2331028 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330955:2331028 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330955:2331028 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua029:1226923:1226923 [2] NCCL INFO cudaDriverVersion 12010
+gpua029:1226923:1226923 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.29<0>
+gpua029:1226923:1226923 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua029:1226923:1226998 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.29<0>
+gpua029:1226923:1226998 [2] NCCL INFO Using network IB
+gpua029:1226923:1226998 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua029:1226923:1226998 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Connected all rings
+gpua029:1226923:1226998 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC/read
+gpua029:1226923:1226998 [2] NCCL INFO Connected all trees
+gpua029:1226923:1226998 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua029:1226923:1226998 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua029:1226923:1226998 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua057:1814426:1814426 [1] NCCL INFO cudaDriverVersion 12010
+gpua057:1814426:1814426 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814426:1814426 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814426:1814504 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814426:1814504 [1] NCCL INFO Using network IB
+gpua057:1814426:1814504 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua057:1814426:1814504 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Connected all rings
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpua057:1814426:1814504 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC/read
+gpua057:1814426:1814504 [1] NCCL INFO Connected all trees
+gpua057:1814426:1814504 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814426:1814504 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814426:1814504 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua074:989791:989791 [0] NCCL INFO cudaDriverVersion 12010
+gpua074:989791:989791 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.74<0>
+gpua074:989791:989791 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua074:989791:989864 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.74<0>
+gpua074:989791:989864 [0] NCCL INFO Using network IB
+gpua074:989791:989864 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua074:989791:989864 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC/read
+gpua074:989791:989864 [0] NCCL INFO Connected all rings
+gpua057:1814428:1814428 [3] NCCL INFO cudaDriverVersion 12010
+gpua057:1814428:1814428 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814428:1814428 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814428:1814503 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814428:1814503 [3] NCCL INFO Using network IB
+gpua057:1814428:1814503 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua057:1814428:1814503 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpua057:1814428:1814503 [3] NCCL INFO Connected all rings
+gpua057:1814428:1814503 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua057:1814428:1814503 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC/read
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpua074:989791:989864 [0] NCCL INFO Connected all trees
+gpua074:989791:989864 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua074:989791:989864 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua074:989791:989864 [0] NCCL INFO comm 0x4f541ea0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua057:1814428:1814503 [3] NCCL INFO Connected all trees
+gpua057:1814428:1814503 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814428:1814503 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814428:1814503 [3] NCCL INFO comm 0xa830f510 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua057:1814427:1814427 [2] NCCL INFO cudaDriverVersion 12010
+gpua057:1814427:1814427 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814427:1814427 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814427:1814505 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814427:1814505 [2] NCCL INFO Using network IB
+gpua057:1814427:1814505 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua057:1814427:1814505 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Connected all rings
+gpua057:1814427:1814505 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC/read
+gpua057:1814427:1814505 [2] NCCL INFO Connected all trees
+gpua057:1814427:1814505 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814427:1814505 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814427:1814505 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua087:2330956:2330956 [3] NCCL INFO cudaDriverVersion 12010
+gpua087:2330956:2330956 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330956:2330956 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330956:2331027 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330956:2331027 [3] NCCL INFO Using network IB
+gpua087:2330956:2331027 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua087:2330956:2331027 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpua087:2330956:2331027 [3] NCCL INFO Connected all rings
+gpua087:2330956:2331027 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua087:2330956:2331027 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC/read
+gpua057:1814425:1814425 [0] NCCL INFO cudaDriverVersion 12010
+gpua057:1814425:1814425 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.57<0>
+gpua057:1814425:1814425 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua057:1814425:1814506 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.57<0>
+gpua057:1814425:1814506 [0] NCCL INFO Using network IB
+gpua057:1814425:1814506 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua057:1814425:1814506 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC/read
+gpua057:1814425:1814506 [0] NCCL INFO Connected all rings
+gpua087:2330956:2331027 [3] NCCL INFO Connected all trees
+gpua087:2330956:2331027 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330956:2331027 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330956:2331027 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpua057:1814425:1814506 [0] NCCL INFO Connected all trees
+gpua057:1814425:1814506 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua057:1814425:1814506 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua057:1814425:1814506 [0] NCCL INFO comm 0xc0b1e520 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua087:2330954:2330954 [1] NCCL INFO cudaDriverVersion 12010
+gpua087:2330954:2330954 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.87<0>
+gpua087:2330954:2330954 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua087:2330954:2331029 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.87<0>
+gpua087:2330954:2331029 [1] NCCL INFO Using network IB
+gpua087:2330954:2331029 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua087:2330954:2331029 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Connected all rings
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpua087:2330954:2331029 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC/read
+gpua087:2330954:2331029 [1] NCCL INFO Connected all trees
+gpua087:2330954:2331029 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua087:2330954:2331029 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua087:2330954:2331029 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua028:3269324:3269324 [3] NCCL INFO cudaDriverVersion 12010
+gpua028:3269324:3269324 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269324:3269324 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269324:3269401 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269324:3269401 [3] NCCL INFO Using network IB
+gpua028:3269324:3269401 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua028:3269324:3269401 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpua028:3269324:3269401 [3] NCCL INFO Connected all rings
+gpua028:3269324:3269401 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua028:3269324:3269401 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC/read
+gpua028:3269324:3269401 [3] NCCL INFO Connected all trees
+gpua028:3269324:3269401 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269324:3269401 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269324:3269401 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua053:959076:959076 [2] NCCL INFO cudaDriverVersion 12010
+gpua053:959076:959076 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959076:959076 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959076:959150 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959076:959150 [2] NCCL INFO Using network IB
+gpua053:959076:959150 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua053:959076:959150 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Connected all rings
+gpua053:959076:959150 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC/read
+gpua053:959076:959150 [2] NCCL INFO Connected all trees
+gpua053:959076:959150 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959076:959150 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959076:959150 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua028:3269322:3269322 [1] NCCL INFO cudaDriverVersion 12010
+gpua028:3269322:3269322 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269322:3269322 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269322:3269404 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269322:3269404 [1] NCCL INFO Using network IB
+gpua028:3269322:3269404 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua028:3269322:3269404 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Connected all rings
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpua028:3269322:3269404 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC/read
+gpua028:3269322:3269404 [1] NCCL INFO Connected all trees
+gpua028:3269322:3269404 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269322:3269404 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269322:3269404 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua053:959074:959074 [0] NCCL INFO cudaDriverVersion 12010
+gpua053:959074:959074 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959074:959074 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959074:959149 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959074:959149 [0] NCCL INFO Using network IB
+gpua053:959074:959149 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua053:959074:959149 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC/read
+gpua053:959074:959149 [0] NCCL INFO Connected all rings
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpua053:959074:959149 [0] NCCL INFO Connected all trees
+gpua053:959074:959149 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959074:959149 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959074:959149 [0] NCCL INFO comm 0x50589df0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua028:3269321:3269321 [0] NCCL INFO cudaDriverVersion 12010
+gpua028:3269321:3269321 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269321:3269321 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269321:3269403 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269321:3269403 [0] NCCL INFO Using network IB
+gpua028:3269321:3269403 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua028:3269321:3269403 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC/read
+gpua028:3269321:3269403 [0] NCCL INFO Connected all rings
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpua028:3269321:3269403 [0] NCCL INFO Connected all trees
+gpua028:3269321:3269403 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269321:3269403 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269321:3269403 [0] NCCL INFO comm 0xc37df860 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua028:3269323:3269323 [2] NCCL INFO cudaDriverVersion 12010
+gpua028:3269323:3269323 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.28<0>
+gpua028:3269323:3269323 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua028:3269323:3269402 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.28<0>
+gpua028:3269323:3269402 [2] NCCL INFO Using network IB
+gpua028:3269323:3269402 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua028:3269323:3269402 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Connected all rings
+gpua028:3269323:3269402 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC/read
+gpua028:3269323:3269402 [2] NCCL INFO Connected all trees
+gpua028:3269323:3269402 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua028:3269323:3269402 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua028:3269323:3269402 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua053:959077:959077 [3] NCCL INFO cudaDriverVersion 12010
+gpua053:959077:959077 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959077:959077 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959077:959151 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959077:959151 [3] NCCL INFO Using network IB
+gpua053:959077:959151 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua053:959077:959151 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpua053:959077:959151 [3] NCCL INFO Connected all rings
+gpua053:959077:959151 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua053:959077:959151 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC/read
+gpua053:959077:959151 [3] NCCL INFO Connected all trees
+gpua053:959077:959151 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959077:959151 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959077:959151 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua053:959075:959075 [1] NCCL INFO cudaDriverVersion 12010
+gpua053:959075:959075 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.53<0>
+gpua053:959075:959075 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua053:959075:959152 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.53<0>
+gpua053:959075:959152 [1] NCCL INFO Using network IB
+gpua053:959075:959152 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua053:959075:959152 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Connected all rings
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpua053:959075:959152 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC/read
+gpua053:959075:959152 [1] NCCL INFO Connected all trees
+gpua053:959075:959152 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua053:959075:959152 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua053:959075:959152 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua055:3866106:3866106 [3] NCCL INFO cudaDriverVersion 12010
+gpua055:3866106:3866106 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866106:3866106 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866106:3866180 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866106:3866180 [3] NCCL INFO Using network IB
+gpua055:3866106:3866180 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua055:3866106:3866180 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpua055:3866106:3866180 [3] NCCL INFO Connected all rings
+gpua055:3866106:3866180 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua055:3866106:3866180 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC/read
+gpua055:3866106:3866180 [3] NCCL INFO Connected all trees
+gpua055:3866106:3866180 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866106:3866180 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866106:3866180 [3] NCCL INFO comm 0xb731bb50 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua055:3866104:3866104 [1] NCCL INFO cudaDriverVersion 12010
+gpua055:3866104:3866104 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866104:3866104 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866104:3866182 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866104:3866182 [1] NCCL INFO Using network IB
+gpua055:3866104:3866182 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua055:3866104:3866182 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Connected all rings
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpua055:3866104:3866182 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC/read
+gpua055:3866104:3866182 [1] NCCL INFO Connected all trees
+gpua055:3866104:3866182 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866104:3866182 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866104:3866182 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua055:3866103:3866103 [0] NCCL INFO cudaDriverVersion 12010
+gpua055:3866103:3866103 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866103:3866103 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866103:3866183 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866103:3866183 [0] NCCL INFO Using network IB
+gpua055:3866103:3866183 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua055:3866103:3866183 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC/read
+gpua055:3866103:3866183 [0] NCCL INFO Connected all rings
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpua055:3866103:3866183 [0] NCCL INFO Connected all trees
+gpua055:3866103:3866183 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866103:3866183 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866103:3866183 [0] NCCL INFO comm 0x8783410 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua055:3866105:3866105 [2] NCCL INFO cudaDriverVersion 12010
+gpua055:3866105:3866105 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.55<0>
+gpua055:3866105:3866105 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua055:3866105:3866181 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.55<0>
+gpua055:3866105:3866181 [2] NCCL INFO Using network IB
+gpua055:3866105:3866181 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua055:3866105:3866181 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Connected all rings
+gpua055:3866105:3866181 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC/read
+gpua055:3866105:3866181 [2] NCCL INFO Connected all trees
+gpua055:3866105:3866181 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua055:3866105:3866181 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua055:3866105:3866181 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua098:2101209:2101209 [1] NCCL INFO cudaDriverVersion 12010
+gpua098:2101209:2101209 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101209:2101209 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101209:2101288 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101209:2101288 [1] NCCL INFO Using network IB
+gpua098:2101209:2101288 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua098:2101209:2101288 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Connected all rings
+gpua098:2101209:2101288 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC/read
+gpua098:2101209:2101288 [1] NCCL INFO Connected all trees
+gpua098:2101209:2101288 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101209:2101288 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101209:2101288 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua098:2101208:2101208 [0] NCCL INFO cudaDriverVersion 12010
+gpua098:2101208:2101208 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101208:2101208 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101208:2101291 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101208:2101291 [0] NCCL INFO Using network IB
+gpua098:2101208:2101291 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua098:2101208:2101291 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC/read
+gpua098:2101208:2101291 [0] NCCL INFO Connected all rings
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpua098:2101208:2101291 [0] NCCL INFO Connected all trees
+gpua098:2101208:2101291 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101208:2101291 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101208:2101291 [0] NCCL INFO comm 0x8ba9dc20 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua098:2101210:2101210 [2] NCCL INFO cudaDriverVersion 12010
+gpua098:2101210:2101210 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101210:2101210 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101210:2101290 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101210:2101290 [2] NCCL INFO Using network IB
+gpua098:2101210:2101290 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua098:2101210:2101290 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Connected all rings
+gpua098:2101210:2101290 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC/read
+gpua098:2101210:2101290 [2] NCCL INFO Connected all trees
+gpua098:2101210:2101290 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101210:2101290 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101210:2101290 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua090:2294100:2294100 [3] NCCL INFO cudaDriverVersion 12010
+gpua090:2294100:2294100 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294100:2294100 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294100:2294189 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294100:2294189 [3] NCCL INFO Using network IB
+gpua090:2294100:2294189 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua090:2294100:2294189 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpua090:2294100:2294189 [3] NCCL INFO Connected all rings
+gpua090:2294100:2294189 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua090:2294100:2294189 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC/read
+gpua090:2294100:2294189 [3] NCCL INFO Connected all trees
+gpua090:2294100:2294189 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294100:2294189 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294100:2294189 [3] NCCL INFO comm 0x8d2a2250 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua090:2294099:2294099 [2] NCCL INFO cudaDriverVersion 12010
+gpua090:2294099:2294099 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294099:2294099 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294099:2294186 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294099:2294186 [2] NCCL INFO Using network IB
+gpua090:2294099:2294186 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua090:2294099:2294186 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Connected all rings
+gpua090:2294099:2294186 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC/read
+gpua090:2294099:2294186 [2] NCCL INFO Connected all trees
+gpua090:2294099:2294186 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294099:2294186 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294099:2294186 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua003:350634:350634 [1] NCCL INFO cudaDriverVersion 12010
+gpua003:350634:350634 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350634:350634 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350634:350707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350634:350707 [1] NCCL INFO Using network IB
+gpua003:350634:350707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua003:350634:350707 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Connected all rings
+gpua003:350634:350707 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC/read
+gpua003:350634:350707 [1] NCCL INFO Connected all trees
+gpua003:350634:350707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350634:350707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350634:350707 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua090:2294098:2294098 [1] NCCL INFO cudaDriverVersion 12010
+gpua090:2294098:2294098 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294098:2294098 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294098:2294187 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294098:2294187 [1] NCCL INFO Using network IB
+gpua090:2294098:2294187 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua090:2294098:2294187 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Connected all rings
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpua090:2294098:2294187 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC/read
+gpua090:2294098:2294187 [1] NCCL INFO Connected all trees
+gpua090:2294098:2294187 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294098:2294187 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294098:2294187 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua098:2101211:2101211 [3] NCCL INFO cudaDriverVersion 12010
+gpua098:2101211:2101211 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.98<0>
+gpua098:2101211:2101211 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua098:2101211:2101289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.98<0>
+gpua098:2101211:2101289 [3] NCCL INFO Using network IB
+gpua098:2101211:2101289 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua098:2101211:2101289 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpua098:2101211:2101289 [3] NCCL INFO Connected all rings
+gpua098:2101211:2101289 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua098:2101211:2101289 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC/read
+gpua098:2101211:2101289 [3] NCCL INFO Connected all trees
+gpua098:2101211:2101289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua098:2101211:2101289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua098:2101211:2101289 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua003:350636:350636 [3] NCCL INFO cudaDriverVersion 12010
+gpua003:350636:350636 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350636:350636 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350636:350708 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350636:350708 [3] NCCL INFO Using network IB
+gpua003:350636:350708 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua003:350636:350708 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpua003:350636:350708 [3] NCCL INFO Connected all rings
+gpua003:350636:350708 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua003:350636:350708 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC/read
+gpua003:350636:350708 [3] NCCL INFO Connected all trees
+gpua003:350636:350708 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350636:350708 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350636:350708 [3] NCCL INFO comm 0x8b901f80 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua010:1622002:1622002 [2] NCCL INFO cudaDriverVersion 12010
+gpua010:1622002:1622002 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622002:1622002 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622002:1622073 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622002:1622073 [2] NCCL INFO Using network IB
+gpua010:1622002:1622073 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua010:1622002:1622073 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Connected all rings
+gpua010:1622002:1622073 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC/read
+gpua010:1622002:1622073 [2] NCCL INFO Connected all trees
+gpua010:1622002:1622073 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622002:1622073 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622002:1622073 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua003:350633:350706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350633:350706 [0] NCCL INFO Using network IB
+gpua003:350633:350706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua003:350633:350706 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua003:350633:350706 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpua003:350633:350706 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC/read
+gpua003:350633:350706 [0] NCCL INFO Connected all rings
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpua003:350633:350706 [0] NCCL INFO Connected all trees
+gpua003:350633:350706 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350633:350706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350633:350706 [0] NCCL INFO comm 0x505c0d10 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua003:350635:350635 [2] NCCL INFO cudaDriverVersion 12010
+gpua003:350635:350635 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.3<0>
+gpua003:350635:350635 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua003:350635:350709 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.3<0>
+gpua003:350635:350709 [2] NCCL INFO Using network IB
+gpua003:350635:350709 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua003:350635:350709 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Connected all rings
+gpua003:350635:350709 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC/read
+gpua003:350635:350709 [2] NCCL INFO Connected all trees
+gpua003:350635:350709 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua003:350635:350709 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua003:350635:350709 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua090:2294097:2294097 [0] NCCL INFO cudaDriverVersion 12010
+gpua090:2294097:2294097 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.90<0>
+gpua090:2294097:2294097 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua090:2294097:2294188 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.90<0>
+gpua090:2294097:2294188 [0] NCCL INFO Using network IB
+gpua090:2294097:2294188 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua090:2294097:2294188 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC/read
+gpua090:2294097:2294188 [0] NCCL INFO Connected all rings
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpua090:2294097:2294188 [0] NCCL INFO Connected all trees
+gpua090:2294097:2294188 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua090:2294097:2294188 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua090:2294097:2294188 [0] NCCL INFO comm 0x4ed27c50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua010:1622003:1622003 [3] NCCL INFO cudaDriverVersion 12010
+gpua010:1622003:1622003 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622003:1622003 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622003:1622076 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622003:1622076 [3] NCCL INFO Using network IB
+gpua010:1622003:1622076 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua010:1622003:1622076 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpua010:1622003:1622076 [3] NCCL INFO Connected all rings
+gpua010:1622003:1622076 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua010:1622003:1622076 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC/read
+gpua010:1622003:1622076 [3] NCCL INFO Connected all trees
+gpua010:1622003:1622076 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622003:1622076 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622003:1622076 [3] NCCL INFO comm 0x9c22310 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua010:1622000:1622000 [0] NCCL INFO cudaDriverVersion 12010
+gpua010:1622000:1622000 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622000:1622000 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622000:1622074 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622000:1622074 [0] NCCL INFO Using network IB
+gpua010:1622000:1622074 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua010:1622000:1622074 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC/read
+gpua010:1622000:1622074 [0] NCCL INFO Connected all rings
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpua010:1622000:1622074 [0] NCCL INFO Connected all trees
+gpua010:1622000:1622074 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622000:1622074 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622000:1622074 [0] NCCL INFO comm 0xc2d78fd0 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua010:1622001:1622001 [1] NCCL INFO cudaDriverVersion 12010
+gpua010:1622001:1622001 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.10<0>
+gpua010:1622001:1622001 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua010:1622001:1622075 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.10<0>
+gpua010:1622001:1622075 [1] NCCL INFO Using network IB
+gpua010:1622001:1622075 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua010:1622001:1622075 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Connected all rings
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpua010:1622001:1622075 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC/read
+gpua010:1622001:1622075 [1] NCCL INFO Connected all trees
+gpua010:1622001:1622075 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua010:1622001:1622075 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua010:1622001:1622075 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua060:2854971:2854971 [3] NCCL INFO cudaDriverVersion 12010
+gpua060:2854971:2854971 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854971:2854971 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854971:2855041 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854971:2855041 [3] NCCL INFO Using network IB
+gpua060:2854971:2855041 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua060:2854971:2855041 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpua060:2854971:2855041 [3] NCCL INFO Connected all rings
+gpua060:2854971:2855041 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua060:2854971:2855041 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC/read
+gpua025:63838:63838 [2] NCCL INFO cudaDriverVersion 12010
+gpua025:63838:63838 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63838:63838 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63838:63912 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63838:63912 [2] NCCL INFO Using network IB
+gpua025:63838:63912 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua025:63838:63912 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Connected all rings
+gpua025:63838:63912 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC/read
+gpua025:63838:63912 [2] NCCL INFO Connected all trees
+gpua060:2854971:2855041 [3] NCCL INFO Connected all trees
+gpua060:2854971:2855041 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854971:2855041 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854971:2855041 [3] NCCL INFO comm 0xb6f9a6a0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua025:63838:63912 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63838:63912 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63838:63912 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua025:63837:63837 [1] NCCL INFO cudaDriverVersion 12010
+gpua025:63837:63837 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63837:63837 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63837:63913 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63837:63913 [1] NCCL INFO Using network IB
+gpua025:63837:63913 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua025:63837:63913 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Connected all rings
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpua025:63837:63913 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC/read
+gpua025:63837:63913 [1] NCCL INFO Connected all trees
+gpua025:63837:63913 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63837:63913 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63837:63913 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua025:63839:63839 [3] NCCL INFO cudaDriverVersion 12010
+gpua025:63839:63839 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63839:63839 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63839:63914 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63839:63914 [3] NCCL INFO Using network IB
+gpua025:63839:63914 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua025:63839:63914 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpua025:63839:63914 [3] NCCL INFO Connected all rings
+gpua025:63839:63914 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua025:63839:63914 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC/read
+gpua025:63839:63914 [3] NCCL INFO Connected all trees
+gpua025:63839:63914 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63839:63914 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63839:63914 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua060:2854968:2854968 [0] NCCL INFO cudaDriverVersion 12010
+gpua060:2854968:2854968 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854968:2854968 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854968:2855043 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854968:2855043 [0] NCCL INFO Using network IB
+gpua060:2854968:2855043 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua060:2854968:2855043 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC/read
+gpua060:2854968:2855043 [0] NCCL INFO Connected all rings
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpua060:2854968:2855043 [0] NCCL INFO Connected all trees
+gpua060:2854968:2855043 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854968:2855043 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854968:2855043 [0] NCCL INFO comm 0x9da77350 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua060:2854970:2854970 [2] NCCL INFO cudaDriverVersion 12010
+gpua060:2854970:2854970 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854970:2854970 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854970:2855044 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854970:2855044 [2] NCCL INFO Using network IB
+gpua060:2854970:2855044 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua060:2854970:2855044 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Connected all rings
+gpua060:2854970:2855044 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC/read
+gpua060:2854970:2855044 [2] NCCL INFO Connected all trees
+gpua060:2854970:2855044 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854970:2855044 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854970:2855044 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua025:63836:63836 [0] NCCL INFO cudaDriverVersion 12010
+gpua025:63836:63836 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.25<0>
+gpua025:63836:63836 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua025:63836:63915 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.25<0>
+gpua025:63836:63915 [0] NCCL INFO Using network IB
+gpua025:63836:63915 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua025:63836:63915 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC/read
+gpua025:63836:63915 [0] NCCL INFO Connected all rings
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpua025:63836:63915 [0] NCCL INFO Connected all trees
+gpua025:63836:63915 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua025:63836:63915 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua025:63836:63915 [0] NCCL INFO comm 0x1772ec20 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua060:2854969:2854969 [1] NCCL INFO cudaDriverVersion 12010
+gpua060:2854969:2854969 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.60<0>
+gpua060:2854969:2854969 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua060:2854969:2855042 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.60<0>
+gpua060:2854969:2855042 [1] NCCL INFO Using network IB
+gpua060:2854969:2855042 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua060:2854969:2855042 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Connected all rings
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpua060:2854969:2855042 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC/read
+gpua060:2854969:2855042 [1] NCCL INFO Connected all trees
+gpua060:2854969:2855042 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua060:2854969:2855042 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua060:2854969:2855042 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua005:322787:322787 [2] NCCL INFO cudaDriverVersion 12010
+gpua005:322787:322787 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322787:322787 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322787:322863 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322787:322863 [2] NCCL INFO Using network IB
+gpua005:322787:322863 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua005:322787:322863 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Connected all rings
+gpua005:322787:322863 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC/read
+gpua005:322787:322863 [2] NCCL INFO Connected all trees
+gpua005:322787:322863 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322787:322863 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322787:322863 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua005:322788:322788 [3] NCCL INFO cudaDriverVersion 12010
+gpua005:322788:322788 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322788:322788 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322788:322860 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322788:322860 [3] NCCL INFO Using network IB
+gpua005:322788:322860 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua005:322788:322860 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpua005:322788:322860 [3] NCCL INFO Connected all rings
+gpua005:322788:322860 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua005:322788:322860 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC/read
+gpua005:322788:322860 [3] NCCL INFO Connected all trees
+gpua005:322788:322860 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322788:322860 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322788:322860 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua005:322785:322785 [0] NCCL INFO cudaDriverVersion 12010
+gpua005:322785:322785 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322785:322785 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322785:322861 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322785:322861 [0] NCCL INFO Using network IB
+gpua005:322785:322861 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua005:322785:322861 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC/read
+gpua005:322785:322861 [0] NCCL INFO Connected all rings
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpua005:322785:322861 [0] NCCL INFO Connected all trees
+gpua005:322785:322861 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322785:322861 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322785:322861 [0] NCCL INFO comm 0xbdcfe00 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpua005:322786:322786 [1] NCCL INFO cudaDriverVersion 12010
+gpua005:322786:322786 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.5<0>
+gpua005:322786:322786 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua005:322786:322862 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.5<0>
+gpua005:322786:322862 [1] NCCL INFO Using network IB
+gpua005:322786:322862 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua005:322786:322862 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Connected all rings
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpua005:322786:322862 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC/read
+gpua005:322786:322862 [1] NCCL INFO Connected all trees
+gpua005:322786:322862 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua005:322786:322862 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua005:322786:322862 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua035:1685218:1685218 [2] NCCL INFO cudaDriverVersion 12010
+gpua035:1685218:1685218 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685218:1685218 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685218:1685292 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685218:1685292 [2] NCCL INFO Using network IB
+gpua035:1685218:1685292 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpua035:1685218:1685292 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Connected all rings
+gpua035:1685218:1685292 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC/read
+gpua035:1685218:1685292 [2] NCCL INFO Connected all trees
+gpua035:1685218:1685292 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685218:1685292 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685218:1685292 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpua035:1685217:1685217 [1] NCCL INFO cudaDriverVersion 12010
+gpua035:1685217:1685217 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685217:1685217 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685217:1685295 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685217:1685295 [1] NCCL INFO Using network IB
+gpua035:1685217:1685295 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpua035:1685217:1685295 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Connected all rings
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpua035:1685217:1685295 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC/read
+gpua035:1685217:1685295 [1] NCCL INFO Connected all trees
+gpua035:1685217:1685295 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685217:1685295 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685217:1685295 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpua035:1685219:1685219 [3] NCCL INFO cudaDriverVersion 12010
+gpua035:1685219:1685219 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685219:1685219 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685219:1685293 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685219:1685293 [3] NCCL INFO Using network IB
+gpua035:1685219:1685293 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpua035:1685219:1685293 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpua035:1685219:1685293 [3] NCCL INFO Connected all rings
+gpua035:1685219:1685293 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua035:1685219:1685293 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC/read
+gpua035:1685219:1685293 [3] NCCL INFO Connected all trees
+gpua035:1685219:1685293 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685219:1685293 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685219:1685293 [3] NCCL INFO comm 0x9d08f8e0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpua035:1685216:1685216 [0] NCCL INFO cudaDriverVersion 12010
+gpua035:1685216:1685216 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.35<0>
+gpua035:1685216:1685216 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpua035:1685216:1685294 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.35<0>
+gpua035:1685216:1685294 [0] NCCL INFO Using network IB
+gpua035:1685216:1685294 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpua035:1685216:1685294 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC/read
+gpua035:1685216:1685294 [0] NCCL INFO Connected all rings
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpua035:1685216:1685294 [0] NCCL INFO Connected all trees
+gpua035:1685216:1685294 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpua035:1685216:1685294 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpua035:1685216:1685294 [0] NCCL INFO comm 0x8b5a90d0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpua003:0/64] 2023-07-05 22:46:27,131 (trainer:732) INFO: 14epoch:train:1-100batch: iter_time=1.256, forward_time=0.181, loss_ctc=67.478, loss_att=50.061, acc=0.683, loss=55.286, backward_time=0.765, grad_norm=84.127, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.113, optim0_lr0=1.020e-04, train_time=5.776
+[gpua003:0/64] 2023-07-05 22:48:06,129 (trainer:732) INFO: 14epoch:train:101-200batch: iter_time=1.036e-04, forward_time=0.104, loss_ctc=76.315, loss_att=60.252, acc=0.659, loss=65.071, backward_time=0.747, grad_norm=106.131, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.020e-04, train_time=1.980
+[gpua003:0/64] 2023-07-05 22:49:44,933 (trainer:732) INFO: 14epoch:train:201-300batch: iter_time=1.112e-04, forward_time=0.104, loss_ctc=71.342, loss_att=53.820, acc=0.681, loss=59.077, backward_time=0.744, grad_norm=88.859, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.020e-04, train_time=1.976
+[gpua003:0/64] 2023-07-05 22:51:23,717 (trainer:732) INFO: 14epoch:train:301-400batch: iter_time=1.061e-04, forward_time=0.104, loss_ctc=74.278, loss_att=54.336, acc=0.672, loss=60.318, backward_time=0.744, grad_norm=83.344, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.975
+[gpua003:0/64] 2023-07-05 22:53:02,383 (trainer:732) INFO: 14epoch:train:401-500batch: iter_time=9.902e-05, forward_time=0.104, loss_ctc=73.819, loss_att=59.568, acc=0.675, loss=63.843, backward_time=0.745, grad_norm=90.212, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.019e-04, train_time=1.973
+[gpua003:0/64] 2023-07-05 22:54:41,341 (trainer:732) INFO: 14epoch:train:501-600batch: iter_time=9.856e-05, forward_time=0.105, loss_ctc=67.201, loss_att=54.515, acc=0.666, loss=58.321, backward_time=0.746, grad_norm=85.901, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.979
+[gpua003:0/64] 2023-07-05 22:56:20,255 (trainer:732) INFO: 14epoch:train:601-700batch: iter_time=9.906e-05, forward_time=0.105, loss_ctc=79.513, loss_att=65.577, acc=0.663, loss=69.758, backward_time=0.745, grad_norm=91.560, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.018e-04, train_time=1.978
+[gpua003:0/64] 2023-07-05 22:58:13,995 (trainer:732) INFO: 14epoch:train:701-800batch: iter_time=1.052e-04, forward_time=0.104, loss_ctc=86.261, loss_att=57.434, acc=0.687, loss=66.082, backward_time=0.756, grad_norm=111.598, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=2.275
+[gpua003:0/64] 2023-07-05 22:59:03,798 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-05 22:59:22,487 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 22:59:25,999 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95d55e6230>)
+[gpua003:0/64] 2023-07-05 22:59:26,000 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-05 22:59:26,006 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:04:47,145 (trainer:732) INFO: 14epoch:train:801-900batch: iter_time=1.366, forward_time=0.106, loss_ctc=79.493, loss_att=56.962, acc=0.683, loss=63.721, backward_time=0.769, grad_norm=96.755, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=7.863
+[gpua003:0/64] 2023-07-05 23:06:27,057 (trainer:732) INFO: 14epoch:train:901-1000batch: iter_time=1.071e-04, forward_time=0.107, loss_ctc=77.587, loss_att=65.112, acc=0.669, loss=68.855, backward_time=0.749, grad_norm=96.509, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.017e-04, train_time=1.998
+[gpua003:0/64] 2023-07-05 23:08:06,608 (trainer:732) INFO: 14epoch:train:1001-1100batch: iter_time=1.219e-04, forward_time=0.108, loss_ctc=69.740, loss_att=52.983, acc=0.695, loss=58.010, backward_time=0.747, grad_norm=83.010, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.991
+[gpua003:0/64] 2023-07-05 23:09:46,058 (trainer:732) INFO: 14epoch:train:1101-1200batch: iter_time=9.900e-05, forward_time=0.107, loss_ctc=71.265, loss_att=51.955, acc=0.681, loss=57.748, backward_time=0.747, grad_norm=86.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.016e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:11:25,939 (trainer:732) INFO: 14epoch:train:1201-1300batch: iter_time=1.090e-04, forward_time=0.107, loss_ctc=72.245, loss_att=58.430, acc=0.682, loss=62.574, backward_time=0.748, grad_norm=82.034, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.997
+[gpua003:0/64] 2023-07-05 23:13:05,201 (trainer:732) INFO: 14epoch:train:1301-1400batch: iter_time=1.137e-04, forward_time=0.106, loss_ctc=68.613, loss_att=56.255, acc=0.673, loss=59.963, backward_time=0.746, grad_norm=90.540, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.015e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:14:44,608 (trainer:732) INFO: 14epoch:train:1401-1500batch: iter_time=1.156e-04, forward_time=0.107, loss_ctc=75.103, loss_att=62.936, acc=0.675, loss=66.586, backward_time=0.747, grad_norm=91.101, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.988
+[gpua003:0/64] 2023-07-05 23:16:23,886 (trainer:732) INFO: 14epoch:train:1501-1600batch: iter_time=1.146e-04, forward_time=0.107, loss_ctc=85.725, loss_att=59.152, acc=0.686, loss=67.124, backward_time=0.748, grad_norm=323.443, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:17:31,811 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-05 23:17:50,852 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 23:17:54,347 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f977ee26890>)
+[gpua003:0/64] 2023-07-05 23:17:54,348 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-05 23:17:54,354 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:22:17,852 (trainer:732) INFO: 14epoch:train:1601-1700batch: iter_time=1.315, forward_time=0.107, loss_ctc=91.275, loss_att=63.464, acc=0.682, loss=71.807, backward_time=0.761, grad_norm=113.989, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.014e-04, train_time=7.079
+[gpua003:0/64] 2023-07-05 23:23:57,809 (trainer:732) INFO: 14epoch:train:1701-1800batch: iter_time=1.095e-04, forward_time=0.106, loss_ctc=65.126, loss_att=52.331, acc=0.668, loss=56.169, backward_time=0.746, grad_norm=83.364, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.999
+[gpua003:0/64] 2023-07-05 23:25:37,561 (trainer:732) INFO: 14epoch:train:1801-1900batch: iter_time=1.136e-04, forward_time=0.105, loss_ctc=79.242, loss_att=61.858, acc=0.676, loss=67.073, backward_time=0.744, grad_norm=97.701, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.013e-04, train_time=1.995
+[gpua003:0/64] 2023-07-05 23:27:17,066 (trainer:732) INFO: 14epoch:train:1901-2000batch: iter_time=1.351e-04, forward_time=0.108, loss_ctc=65.033, loss_att=46.741, acc=0.696, loss=52.229, backward_time=0.748, grad_norm=72.732, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.990
+[gpua003:0/64] 2023-07-05 23:28:56,259 (trainer:732) INFO: 14epoch:train:2001-2100batch: iter_time=1.376e-04, forward_time=0.107, loss_ctc=74.297, loss_att=55.693, acc=0.679, loss=61.274, backward_time=0.747, grad_norm=81.546, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.012e-04, train_time=1.984
+[gpua003:0/64] 2023-07-05 23:30:35,698 (trainer:732) INFO: 14epoch:train:2101-2200batch: iter_time=1.153e-04, forward_time=0.107, loss_ctc=69.547, loss_att=58.660, acc=0.668, loss=61.926, backward_time=0.747, grad_norm=98.590, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.012e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:32:14,941 (trainer:732) INFO: 14epoch:train:2201-2300batch: iter_time=1.191e-04, forward_time=0.105, loss_ctc=70.991, loss_att=61.134, acc=0.665, loss=64.091, backward_time=0.745, grad_norm=88.986, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:33:54,151 (trainer:732) INFO: 14epoch:train:2301-2400batch: iter_time=1.136e-04, forward_time=0.106, loss_ctc=78.668, loss_att=55.499, acc=0.691, loss=62.450, backward_time=0.744, grad_norm=94.472, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.011e-04, train_time=1.984
+[gpua003:0/64] 2023-07-05 23:35:33,588 (trainer:732) INFO: 14epoch:train:2401-2500batch: iter_time=1.069e-04, forward_time=0.106, loss_ctc=89.577, loss_att=66.586, acc=0.668, loss=73.483, backward_time=0.746, grad_norm=115.780, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.010e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:35:35,877 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-05 23:35:54,848 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520e5fa60>)
+[gpua003:0/64] 2023-07-05 23:35:58,368 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-05 23:35:58,374 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:42:19,890 (trainer:732) INFO: 14epoch:train:2501-2600batch: iter_time=1.276, forward_time=0.106, loss_ctc=65.318, loss_att=47.624, acc=0.706, loss=52.932, backward_time=0.758, grad_norm=76.861, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=8.126
+[gpua003:0/64] 2023-07-05 23:44:00,174 (trainer:732) INFO: 14epoch:train:2601-2700batch: iter_time=1.018e-04, forward_time=0.106, loss_ctc=74.292, loss_att=60.131, acc=0.672, loss=64.379, backward_time=0.748, grad_norm=91.235, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.010e-04, train_time=2.005
+[gpua003:0/64] 2023-07-05 23:45:39,635 (trainer:732) INFO: 14epoch:train:2701-2800batch: iter_time=9.909e-05, forward_time=0.106, loss_ctc=70.460, loss_att=53.646, acc=0.696, loss=58.690, backward_time=0.747, grad_norm=76.382, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.989
+[gpua003:0/64] 2023-07-05 23:47:18,986 (trainer:732) INFO: 14epoch:train:2801-2900batch: iter_time=9.455e-05, forward_time=0.107, loss_ctc=72.107, loss_att=52.607, acc=0.679, loss=58.457, backward_time=0.746, grad_norm=92.055, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.009e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:48:58,340 (trainer:732) INFO: 14epoch:train:2901-3000batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=71.947, loss_att=58.354, acc=0.687, loss=62.432, backward_time=0.747, grad_norm=87.978, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:50:37,583 (trainer:732) INFO: 14epoch:train:3001-3100batch: iter_time=1.174e-04, forward_time=0.106, loss_ctc=67.838, loss_att=54.060, acc=0.680, loss=58.194, backward_time=0.746, grad_norm=87.646, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.008e-04, train_time=1.985
+[gpua003:0/64] 2023-07-05 23:52:16,968 (trainer:732) INFO: 14epoch:train:3101-3200batch: iter_time=9.872e-05, forward_time=0.106, loss_ctc=78.444, loss_att=65.330, acc=0.674, loss=69.264, backward_time=0.747, grad_norm=89.372, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:53:56,336 (trainer:732) INFO: 14epoch:train:3201-3300batch: iter_time=1.046e-04, forward_time=0.106, loss_ctc=84.969, loss_att=57.606, acc=0.694, loss=65.815, backward_time=0.746, grad_norm=110.526, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.007e-04, train_time=1.987
+[gpua003:0/64] 2023-07-05 23:54:31,552 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-05 23:54:50,698 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a422d10>)
+[gpua003:0/64] 2023-07-05 23:54:54,271 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-05 23:54:54,277 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-05 23:59:43,333 (trainer:732) INFO: 14epoch:train:3301-3400batch: iter_time=1.303, forward_time=0.146, loss_ctc=74.612, loss_att=55.066, acc=0.684, loss=60.930, backward_time=0.760, grad_norm=98.484, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.111, optim0_lr0=1.007e-04, train_time=6.939
+[gpua003:0/64] 2023-07-06 00:01:23,508 (trainer:732) INFO: 14epoch:train:3401-3500batch: iter_time=9.903e-05, forward_time=0.106, loss_ctc=79.257, loss_att=64.273, acc=0.665, loss=68.768, backward_time=0.747, grad_norm=97.943, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=2.004
+[gpua003:0/64] 2023-07-06 00:03:02,774 (trainer:732) INFO: 14epoch:train:3501-3600batch: iter_time=1.067e-04, forward_time=0.105, loss_ctc=69.162, loss_att=52.606, acc=0.690, loss=57.572, backward_time=0.744, grad_norm=91.125, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.006e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:04:42,112 (trainer:732) INFO: 14epoch:train:3601-3700batch: iter_time=1.060e-04, forward_time=0.105, loss_ctc=67.528, loss_att=48.483, acc=0.692, loss=54.197, backward_time=0.746, grad_norm=77.302, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.109, optim0_lr0=1.005e-04, train_time=1.987
+[gpua003:0/64] 2023-07-06 00:06:21,444 (trainer:732) INFO: 14epoch:train:3701-3800batch: iter_time=1.056e-04, forward_time=0.105, loss_ctc=70.824, loss_att=56.721, acc=0.686, loss=60.952, backward_time=0.745, grad_norm=88.267, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.986
+[gpua003:0/64] 2023-07-06 00:08:00,672 (trainer:732) INFO: 14epoch:train:3801-3900batch: iter_time=1.154e-04, forward_time=0.106, loss_ctc=64.923, loss_att=53.550, acc=0.673, loss=56.962, backward_time=0.746, grad_norm=87.728, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.005e-04, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:09:40,070 (trainer:732) INFO: 14epoch:train:3901-4000batch: iter_time=1.270e-04, forward_time=0.106, loss_ctc=73.750, loss_att=61.788, acc=0.673, loss=65.377, backward_time=0.746, grad_norm=99.456, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.110, optim0_lr0=1.004e-04, train_time=1.988
+[gpua003:0/64] 2023-07-06 00:11:19,231 (trainer:732) INFO: 14epoch:train:4001-4100batch: iter_time=1.074e-04, forward_time=0.105, loss_ctc=83.056, loss_att=57.768, acc=0.689, loss=65.354, backward_time=0.744, grad_norm=118.212, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.004e-04, train_time=1.983
+[gpua003:0/64] 2023-07-06 00:12:39,831 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 00:12:59,113 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520eb7610>)
+[gpua003:0/64] 2023-07-06 00:13:02,670 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 00:13:02,687 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 00:16:52,797 (trainer:732) INFO: 14epoch:train:4101-4200batch: iter_time=2.223, forward_time=0.105, loss_ctc=86.387, loss_att=61.491, acc=0.682, loss=68.960, backward_time=0.756, grad_norm=109.255, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.003e-04, train_time=6.671
+[gpua003:0/64] 2023-07-06 00:18:32,838 (trainer:732) INFO: 14epoch:train:4201-4300batch: iter_time=9.583e-05, forward_time=0.105, loss_ctc=66.113, loss_att=52.338, acc=0.676, loss=56.471, backward_time=0.749, grad_norm=87.547, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=2.001
+[gpua003:0/64] 2023-07-06 00:20:12,105 (trainer:732) INFO: 14epoch:train:4301-4400batch: iter_time=9.179e-05, forward_time=0.105, loss_ctc=74.526, loss_att=56.969, acc=0.689, loss=62.236, backward_time=0.745, grad_norm=94.528, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.003e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:21:52,782 (trainer:732) INFO: 14epoch:train:4401-4500batch: iter_time=1.001e-04, forward_time=0.105, loss_ctc=67.853, loss_att=50.469, acc=0.683, loss=55.684, backward_time=0.746, grad_norm=71.313, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.013
+[gpua003:0/64] 2023-07-06 00:23:34,997 (trainer:732) INFO: 14epoch:train:4501-4600batch: iter_time=1.021e-04, forward_time=0.105, loss_ctc=70.434, loss_att=50.423, acc=0.692, loss=56.426, backward_time=0.747, grad_norm=81.773, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.002e-04, train_time=2.044
+[gpua003:0/64] 2023-07-06 00:25:14,224 (trainer:732) INFO: 14epoch:train:4601-4700batch: iter_time=9.233e-05, forward_time=0.104, loss_ctc=72.767, loss_att=60.956, acc=0.675, loss=64.499, backward_time=0.745, grad_norm=91.196, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:26:53,508 (trainer:732) INFO: 14epoch:train:4701-4800batch: iter_time=9.383e-05, forward_time=0.105, loss_ctc=67.376, loss_att=54.770, acc=0.670, loss=58.552, backward_time=0.746, grad_norm=96.216, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.001e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:28:32,809 (trainer:732) INFO: 14epoch:train:4801-4900batch: iter_time=1.024e-04, forward_time=0.105, loss_ctc=78.619, loss_att=59.814, acc=0.681, loss=65.456, backward_time=0.745, grad_norm=92.282, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=1.001e-04, train_time=1.986
+[gpua003:0/64] 2023-07-06 00:30:12,054 (trainer:732) INFO: 14epoch:train:4901-5000batch: iter_time=1.019e-04, forward_time=0.106, loss_ctc=85.585, loss_att=63.122, acc=0.681, loss=69.861, backward_time=0.744, grad_norm=103.405, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:30:14,272 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 00:30:33,230 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950da09060>)
+[gpua003:0/64] 2023-07-06 00:30:36,750 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 00:30:36,757 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 00:35:36,135 (trainer:732) INFO: 14epoch:train:5001-5100batch: iter_time=1.279, forward_time=0.105, loss_ctc=64.965, loss_att=47.311, acc=0.710, loss=52.607, backward_time=0.755, grad_norm=80.711, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.998e-05, train_time=6.481
+[gpua003:0/64] 2023-07-06 00:37:16,313 (trainer:732) INFO: 14epoch:train:5101-5200batch: iter_time=1.019e-04, forward_time=0.105, loss_ctc=71.440, loss_att=58.275, acc=0.679, loss=62.225, backward_time=0.746, grad_norm=92.300, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.994e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 00:38:55,753 (trainer:732) INFO: 14epoch:train:5201-5300batch: iter_time=9.692e-05, forward_time=0.106, loss_ctc=68.311, loss_att=51.470, acc=0.704, loss=56.522, backward_time=0.746, grad_norm=109.860, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.990e-05, train_time=1.989
+[gpua003:0/64] 2023-07-06 00:40:35,087 (trainer:732) INFO: 14epoch:train:5301-5400batch: iter_time=9.289e-05, forward_time=0.106, loss_ctc=71.699, loss_att=52.171, acc=0.683, loss=58.029, backward_time=0.746, grad_norm=87.463, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.986e-05, train_time=1.986
+[gpua003:0/64] 2023-07-06 00:42:14,290 (trainer:732) INFO: 14epoch:train:5401-5500batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=71.095, loss_att=58.813, acc=0.688, loss=62.498, backward_time=0.744, grad_norm=80.801, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.982e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:43:53,453 (trainer:732) INFO: 14epoch:train:5501-5600batch: iter_time=1.011e-04, forward_time=0.105, loss_ctc=65.967, loss_att=52.829, acc=0.683, loss=56.770, backward_time=0.743, grad_norm=80.073, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.978e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 00:45:32,694 (trainer:732) INFO: 14epoch:train:5601-5700batch: iter_time=9.394e-05, forward_time=0.105, loss_ctc=76.086, loss_att=65.998, acc=0.677, loss=69.024, backward_time=0.745, grad_norm=84.296, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.974e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 00:47:11,756 (trainer:732) INFO: 14epoch:train:5701-5800batch: iter_time=1.005e-04, forward_time=0.104, loss_ctc=80.981, loss_att=57.081, acc=0.692, loss=64.251, backward_time=0.745, grad_norm=117.230, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.970e-05, train_time=1.981
+[gpua003:0/64] 2023-07-06 00:47:46,785 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 00:48:05,668 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95201cb610>)
+[gpua003:0/64] 2023-07-06 00:48:09,217 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 00:48:09,223 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 00:51:56,399 (trainer:732) INFO: 14epoch:train:5801-5900batch: iter_time=1.268, forward_time=0.105, loss_ctc=70.851, loss_att=51.948, acc=0.699, loss=57.619, backward_time=0.758, grad_norm=95.312, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.966e-05, train_time=5.693
+[gpua003:0/64] 2023-07-06 00:53:36,326 (trainer:732) INFO: 14epoch:train:5901-6000batch: iter_time=9.412e-05, forward_time=0.105, loss_ctc=74.049, loss_att=61.596, acc=0.687, loss=65.332, backward_time=0.745, grad_norm=83.262, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.962e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 00:55:15,556 (trainer:732) INFO: 14epoch:train:6001-6100batch: iter_time=9.442e-05, forward_time=0.105, loss_ctc=72.845, loss_att=54.112, acc=0.692, loss=59.732, backward_time=0.744, grad_norm=84.620, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.958e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 00:56:54,727 (trainer:732) INFO: 14epoch:train:6101-6200batch: iter_time=9.749e-05, forward_time=0.106, loss_ctc=62.931, loss_att=46.045, acc=0.705, loss=51.111, backward_time=0.744, grad_norm=75.046, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.954e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 00:58:34,088 (trainer:732) INFO: 14epoch:train:6201-6300batch: iter_time=9.675e-05, forward_time=0.106, loss_ctc=75.966, loss_att=57.343, acc=0.689, loss=62.930, backward_time=0.745, grad_norm=93.914, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.950e-05, train_time=1.987
+[gpua003:0/64] 2023-07-06 01:00:28,991 (trainer:732) INFO: 14epoch:train:6301-6400batch: iter_time=9.513e-05, forward_time=0.105, loss_ctc=65.892, loss_att=55.960, acc=0.686, loss=58.940, backward_time=0.769, grad_norm=98.285, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.946e-05, train_time=2.298
+[gpua003:0/64] 2023-07-06 01:02:08,399 (trainer:732) INFO: 14epoch:train:6401-6500batch: iter_time=9.649e-05, forward_time=0.106, loss_ctc=71.103, loss_att=59.408, acc=0.685, loss=62.916, backward_time=0.746, grad_norm=81.143, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.942e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 01:03:51,462 (trainer:732) INFO: 14epoch:train:6501-6600batch: iter_time=1.032e-04, forward_time=0.106, loss_ctc=81.907, loss_att=58.088, acc=0.700, loss=65.234, backward_time=0.749, grad_norm=98.608, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.938e-05, train_time=2.061
+[gpua003:0/64] 2023-07-06 01:04:59,329 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 01:05:18,889 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 01:05:22,372 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9521b5ca00>)
+[gpua003:0/64] 2023-07-06 01:05:22,373 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 01:05:22,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 01:10:37,392 (trainer:732) INFO: 14epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.107, loss_ctc=81.131, loss_att=58.478, acc=0.688, loss=65.274, backward_time=0.755, grad_norm=114.707, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.935e-05, train_time=8.118
+[gpua003:0/64] 2023-07-06 01:12:18,338 (trainer:732) INFO: 14epoch:train:6701-6800batch: iter_time=1.129e-04, forward_time=0.106, loss_ctc=63.589, loss_att=52.978, acc=0.681, loss=56.162, backward_time=0.750, grad_norm=83.786, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.931e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 01:13:57,605 (trainer:732) INFO: 14epoch:train:6801-6900batch: iter_time=1.194e-04, forward_time=0.106, loss_ctc=73.216, loss_att=56.431, acc=0.697, loss=61.466, backward_time=0.746, grad_norm=85.113, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.927e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 01:15:36,756 (trainer:732) INFO: 14epoch:train:6901-7000batch: iter_time=1.192e-04, forward_time=0.106, loss_ctc=66.478, loss_att=48.895, acc=0.692, loss=54.170, backward_time=0.745, grad_norm=88.642, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.923e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 01:17:15,887 (trainer:732) INFO: 14epoch:train:7001-7100batch: iter_time=1.179e-04, forward_time=0.106, loss_ctc=70.266, loss_att=50.382, acc=0.693, loss=56.347, backward_time=0.746, grad_norm=84.071, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.919e-05, train_time=1.982
+[gpua003:0/64] 2023-07-06 01:18:54,896 (trainer:732) INFO: 14epoch:train:7101-7200batch: iter_time=1.187e-04, forward_time=0.105, loss_ctc=73.012, loss_att=61.557, acc=0.673, loss=64.994, backward_time=0.746, grad_norm=89.483, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.915e-05, train_time=1.980
+[gpua003:0/64] 2023-07-06 01:20:34,275 (trainer:732) INFO: 14epoch:train:7201-7300batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=65.363, loss_att=55.629, acc=0.669, loss=58.549, backward_time=0.746, grad_norm=89.409, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.911e-05, train_time=1.987
+[gpua003:0/64] 2023-07-06 01:22:13,430 (trainer:732) INFO: 14epoch:train:7301-7400batch: iter_time=1.061e-04, forward_time=0.105, loss_ctc=78.086, loss_att=58.693, acc=0.692, loss=64.511, backward_time=0.744, grad_norm=93.347, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.907e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 01:23:52,443 (trainer:732) INFO: 14epoch:train:7401-7500batch: iter_time=1.255e-04, forward_time=0.105, loss_ctc=85.073, loss_att=62.105, acc=0.679, loss=68.995, backward_time=0.745, grad_norm=99.476, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.903e-05, train_time=1.980
+[gpua003:0/64] 2023-07-06 01:23:53,863 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 01:24:12,946 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a12fa60>)
+[gpua003:0/64] 2023-07-06 01:24:16,467 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 01:24:16,474 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 01:28:56,627 (trainer:732) INFO: 14epoch:train:7501-7600batch: iter_time=1.287, forward_time=0.106, loss_ctc=68.678, loss_att=50.359, acc=0.697, loss=55.855, backward_time=0.755, grad_norm=80.423, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.899e-05, train_time=6.083
+[gpua003:0/64] 2023-07-06 01:30:36,072 (trainer:732) INFO: 14epoch:train:7601-7700batch: iter_time=1.040e-04, forward_time=0.106, loss_ctc=68.907, loss_att=57.070, acc=0.685, loss=60.621, backward_time=0.745, grad_norm=91.649, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.896e-05, train_time=1.989
+[gpua003:0/64] 2023-07-06 01:32:15,336 (trainer:732) INFO: 14epoch:train:7701-7800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=70.050, loss_att=51.852, acc=0.696, loss=57.312, backward_time=0.744, grad_norm=83.173, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.892e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 01:33:54,551 (trainer:732) INFO: 14epoch:train:7801-7900batch: iter_time=9.941e-05, forward_time=0.106, loss_ctc=73.006, loss_att=51.646, acc=0.691, loss=58.054, backward_time=0.745, grad_norm=86.960, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.109, optim0_lr0=9.888e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 01:35:34,182 (trainer:732) INFO: 14epoch:train:7901-8000batch: iter_time=8.641e-05, forward_time=0.107, loss_ctc=70.408, loss_att=60.834, acc=0.686, loss=63.707, backward_time=0.748, grad_norm=102.961, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=9.884e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 01:37:13,489 (trainer:732) INFO: 14epoch:train:8001-8100batch: iter_time=1.062e-04, forward_time=0.107, loss_ctc=66.066, loss_att=55.633, acc=0.676, loss=58.763, backward_time=0.747, grad_norm=84.748, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.880e-05, train_time=1.986
+[gpua003:0/64] 2023-07-06 01:38:55,993 (trainer:732) INFO: 14epoch:train:8101-8200batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=77.571, loss_att=61.968, acc=0.687, loss=66.649, backward_time=0.748, grad_norm=91.196, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.876e-05, train_time=2.050
+[gpua003:0/64] 2023-07-06 01:40:35,417 (trainer:732) INFO: 14epoch:train:8201-8300batch: iter_time=9.520e-05, forward_time=0.107, loss_ctc=80.442, loss_att=55.920, acc=0.694, loss=63.276, backward_time=0.747, grad_norm=82.962, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.872e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 01:41:10,610 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 01:41:29,651 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f901ffdfc10>)
+[gpua003:0/64] 2023-07-06 01:41:33,072 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 01:41:33,078 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 01:46:45,702 (trainer:732) INFO: 14epoch:train:8301-8400batch: iter_time=1.290, forward_time=0.120, loss_ctc=72.247, loss_att=53.015, acc=0.698, loss=58.785, backward_time=0.761, grad_norm=82.758, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.869e-05, train_time=7.405
+[gpua003:0/64] 2023-07-06 01:48:26,447 (trainer:732) INFO: 14epoch:train:8401-8500batch: iter_time=1.170e-04, forward_time=0.105, loss_ctc=72.826, loss_att=61.567, acc=0.673, loss=64.945, backward_time=0.746, grad_norm=92.862, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.865e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 01:50:06,144 (trainer:732) INFO: 14epoch:train:8501-8600batch: iter_time=1.142e-04, forward_time=0.105, loss_ctc=71.755, loss_att=54.026, acc=0.689, loss=59.345, backward_time=0.745, grad_norm=84.483, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.861e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 01:51:45,693 (trainer:732) INFO: 14epoch:train:8601-8700batch: iter_time=1.203e-04, forward_time=0.106, loss_ctc=62.700, loss_att=45.802, acc=0.702, loss=50.872, backward_time=0.746, grad_norm=76.913, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.857e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 01:53:38,865 (trainer:732) INFO: 14epoch:train:8701-8800batch: iter_time=1.071e-04, forward_time=0.106, loss_ctc=74.653, loss_att=55.710, acc=0.688, loss=61.393, backward_time=0.773, grad_norm=129.792, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.853e-05, train_time=2.263
+[gpua003:0/64] 2023-07-06 01:55:18,104 (trainer:732) INFO: 14epoch:train:8801-8900batch: iter_time=1.105e-04, forward_time=0.106, loss_ctc=64.757, loss_att=56.246, acc=0.674, loss=58.799, backward_time=0.744, grad_norm=80.970, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.849e-05, train_time=1.985
+[gpua003:0/64] 2023-07-06 01:56:59,672 (trainer:732) INFO: 14epoch:train:8901-9000batch: iter_time=1.077e-04, forward_time=0.106, loss_ctc=70.334, loss_att=57.944, acc=0.678, loss=61.661, backward_time=0.751, grad_norm=83.367, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.846e-05, train_time=2.031
+[gpua003:0/64] 2023-07-06 01:58:38,903 (trainer:732) INFO: 14epoch:train:9001-9100batch: iter_time=1.082e-04, forward_time=0.106, loss_ctc=80.565, loss_att=57.072, acc=0.700, loss=64.119, backward_time=0.745, grad_norm=93.217, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.842e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 01:59:52,859 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 02:00:11,825 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950de674f0>)
+[gpua003:0/64] 2023-07-06 02:00:15,305 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 02:00:15,311 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 02:04:51,398 (trainer:732) INFO: 14epoch:train:9101-9200batch: iter_time=1.321, forward_time=0.157, loss_ctc=81.944, loss_att=58.952, acc=0.689, loss=65.850, backward_time=0.767, grad_norm=105.225, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.838e-05, train_time=7.449
+[gpua003:0/64] 2023-07-06 02:06:31,077 (trainer:732) INFO: 14epoch:train:9201-9300batch: iter_time=1.023e-04, forward_time=0.105, loss_ctc=66.026, loss_att=53.515, acc=0.692, loss=57.268, backward_time=0.747, grad_norm=86.498, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.834e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 02:08:11,806 (trainer:732) INFO: 14epoch:train:9301-9400batch: iter_time=9.862e-05, forward_time=0.106, loss_ctc=73.964, loss_att=57.337, acc=0.700, loss=62.325, backward_time=0.745, grad_norm=85.275, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.830e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 02:09:51,859 (trainer:732) INFO: 14epoch:train:9401-9500batch: iter_time=1.055e-04, forward_time=0.106, loss_ctc=64.980, loss_att=48.354, acc=0.695, loss=53.342, backward_time=0.745, grad_norm=74.030, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.827e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 02:11:31,288 (trainer:732) INFO: 14epoch:train:9501-9600batch: iter_time=1.036e-04, forward_time=0.106, loss_ctc=70.565, loss_att=51.030, acc=0.695, loss=56.890, backward_time=0.744, grad_norm=84.506, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.823e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 02:13:10,467 (trainer:732) INFO: 14epoch:train:9601-9700batch: iter_time=1.057e-04, forward_time=0.106, loss_ctc=71.513, loss_att=60.661, acc=0.691, loss=63.916, backward_time=0.745, grad_norm=87.016, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.819e-05, train_time=1.983
+[gpua003:0/64] 2023-07-06 02:14:49,595 (trainer:732) INFO: 14epoch:train:9701-9800batch: iter_time=1.059e-04, forward_time=0.106, loss_ctc=65.538, loss_att=54.445, acc=0.682, loss=57.773, backward_time=0.744, grad_norm=94.902, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.110, optim0_lr0=9.815e-05, train_time=1.982
+[gpua003:0/64] 2023-07-06 02:16:28,940 (trainer:732) INFO: 14epoch:train:9801-9900batch: iter_time=1.015e-04, forward_time=0.107, loss_ctc=77.709, loss_att=58.250, acc=0.701, loss=64.087, backward_time=0.745, grad_norm=110.894, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.811e-05, train_time=1.987
+[gpua003:0/64] 2023-07-06 02:18:08,171 (trainer:732) INFO: 14epoch:train:9901-10000batch: iter_time=9.460e-05, forward_time=0.106, loss_ctc=82.982, loss_att=61.283, acc=0.688, loss=67.793, backward_time=0.744, grad_norm=103.022, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.109, optim0_lr0=9.808e-05, train_time=1.984
+[gpua003:0/64] 2023-07-06 02:32:00,423 (trainer:338) INFO: 14epoch results: [train] iter_time=0.165, forward_time=0.108, loss_ctc=73.204, loss_att=56.354, acc=0.685, loss=61.409, backward_time=0.748, grad_norm=93.679, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.110, optim0_lr0=1.000e-04, train_time=2.598, time=3 hours, 36 minutes and 52.71 seconds, total_count=110000, gpu_max_cached_mem_GB=34.473, [valid] loss_ctc=52.779, cer_ctc=0.299, loss_att=43.314, acc=0.648, cer=0.406, wer=0.989, loss=46.153, time=7 minutes and 15.36 seconds, total_count=11638, gpu_max_cached_mem_GB=37.768, [att_plot] time=6 minutes and 14.08 seconds, total_count=0, gpu_max_cached_mem_GB=37.768
+[gpua003:0/64] 2023-07-06 02:32:18,801 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua003:0/64] 2023-07-06 02:32:18,908 (trainer:272) INFO: 15/100epoch started. Estimated time to finish: 1 week, 6 days and 18 hours
+[gpua003:0/64] 2023-07-06 02:32:19,981 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 02:32:39,156 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 02:32:42,632 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8ee3ab61d0>)
+[gpua003:0/64] 2023-07-06 02:32:42,633 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 02:32:42,673 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 02:39:26,949 (trainer:732) INFO: 15epoch:train:1-100batch: iter_time=3.199, forward_time=0.158, loss_ctc=82.906, loss_att=63.268, acc=0.672, loss=69.159, backward_time=0.766, grad_norm=96.452, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.118, optim0_lr0=9.804e-05, train_time=8.551
+[gpua003:0/64] 2023-07-06 02:41:12,530 (trainer:732) INFO: 15epoch:train:101-200batch: iter_time=1.083e-04, forward_time=0.110, loss_ctc=93.463, loss_att=58.148, acc=0.684, loss=68.742, backward_time=0.761, grad_norm=99.343, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.800e-05, train_time=2.112
+[gpua003:0/64] 2023-07-06 02:42:58,616 (trainer:732) INFO: 15epoch:train:201-300batch: iter_time=1.075e-04, forward_time=0.109, loss_ctc=72.287, loss_att=52.500, acc=0.677, loss=58.436, backward_time=0.756, grad_norm=100.014, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.796e-05, train_time=2.122
+[gpua003:0/64] 2023-07-06 02:44:49,824 (trainer:732) INFO: 15epoch:train:301-400batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=74.935, loss_att=63.168, acc=0.669, loss=66.698, backward_time=0.769, grad_norm=93.180, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.793e-05, train_time=2.224
+[gpua003:0/64] 2023-07-06 02:46:40,595 (trainer:732) INFO: 15epoch:train:401-500batch: iter_time=1.069e-04, forward_time=0.109, loss_ctc=84.885, loss_att=68.005, acc=0.654, loss=73.069, backward_time=0.761, grad_norm=120.148, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.114, optim0_lr0=9.789e-05, train_time=2.215
+[gpua003:0/64] 2023-07-06 02:48:27,207 (trainer:732) INFO: 15epoch:train:501-600batch: iter_time=1.120e-04, forward_time=0.116, loss_ctc=88.432, loss_att=68.337, acc=0.676, loss=74.366, backward_time=0.760, grad_norm=91.380, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.785e-05, train_time=2.132
+[gpua003:0/64] 2023-07-06 02:50:13,816 (trainer:732) INFO: 15epoch:train:601-700batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=76.229, loss_att=54.425, acc=0.696, loss=60.966, backward_time=0.767, grad_norm=83.791, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.781e-05, train_time=2.132
+[gpua003:0/64] 2023-07-06 02:52:11,799 (trainer:732) INFO: 15epoch:train:701-800batch: iter_time=1.037e-04, forward_time=0.120, loss_ctc=79.546, loss_att=62.351, acc=0.675, loss=67.509, backward_time=0.782, grad_norm=103.551, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.778e-05, train_time=2.359
+[gpua003:0/64] 2023-07-06 02:53:02,111 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 02:53:21,354 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e42a7a30>)
+[gpua003:0/64] 2023-07-06 02:53:24,837 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 02:53:24,887 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 02:58:05,256 (trainer:732) INFO: 15epoch:train:801-900batch: iter_time=1.628, forward_time=0.133, loss_ctc=82.093, loss_att=59.999, acc=0.678, loss=66.627, backward_time=0.769, grad_norm=104.738, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.113, optim0_lr0=9.774e-05, train_time=7.069
+[gpua003:0/64] 2023-07-06 02:59:46,184 (trainer:732) INFO: 15epoch:train:901-1000batch: iter_time=1.123e-04, forward_time=0.108, loss_ctc=77.407, loss_att=56.196, acc=0.685, loss=62.559, backward_time=0.752, grad_norm=95.582, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.770e-05, train_time=2.018
+[gpua003:0/64] 2023-07-06 03:01:26,322 (trainer:732) INFO: 15epoch:train:1001-1100batch: iter_time=9.835e-05, forward_time=0.107, loss_ctc=85.077, loss_att=54.745, acc=0.695, loss=63.845, backward_time=0.755, grad_norm=87.655, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.766e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 03:03:06,138 (trainer:732) INFO: 15epoch:train:1101-1200batch: iter_time=8.910e-05, forward_time=0.106, loss_ctc=71.804, loss_att=57.490, acc=0.664, loss=61.784, backward_time=0.752, grad_norm=96.520, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.763e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 03:04:46,029 (trainer:732) INFO: 15epoch:train:1201-1300batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=78.767, loss_att=64.261, acc=0.679, loss=68.613, backward_time=0.752, grad_norm=95.553, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.759e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 03:06:25,781 (trainer:732) INFO: 15epoch:train:1301-1400batch: iter_time=9.587e-05, forward_time=0.106, loss_ctc=89.123, loss_att=69.314, acc=0.678, loss=75.257, backward_time=0.752, grad_norm=99.925, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.755e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 03:08:05,655 (trainer:732) INFO: 15epoch:train:1401-1500batch: iter_time=9.323e-05, forward_time=0.106, loss_ctc=77.240, loss_att=56.417, acc=0.693, loss=62.664, backward_time=0.752, grad_norm=92.784, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.751e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 03:09:45,213 (trainer:732) INFO: 15epoch:train:1501-1600batch: iter_time=9.074e-05, forward_time=0.105, loss_ctc=76.823, loss_att=57.859, acc=0.677, loss=63.548, backward_time=0.751, grad_norm=99.313, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.748e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 03:10:52,700 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 03:11:11,991 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e3721d20>)
+[gpua003:0/64] 2023-07-06 03:11:15,554 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 03:11:15,561 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 03:16:19,134 (trainer:732) INFO: 15epoch:train:1601-1700batch: iter_time=1.281, forward_time=0.107, loss_ctc=78.684, loss_att=60.055, acc=0.678, loss=65.643, backward_time=0.768, grad_norm=81.939, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.112, optim0_lr0=9.744e-05, train_time=7.878
+[gpua003:0/64] 2023-07-06 03:18:00,308 (trainer:732) INFO: 15epoch:train:1701-1800batch: iter_time=9.922e-05, forward_time=0.107, loss_ctc=79.008, loss_att=56.298, acc=0.696, loss=63.111, backward_time=0.753, grad_norm=81.521, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.740e-05, train_time=2.023
+[gpua003:0/64] 2023-07-06 03:19:40,539 (trainer:732) INFO: 15epoch:train:1801-1900batch: iter_time=9.836e-05, forward_time=0.108, loss_ctc=90.709, loss_att=57.184, acc=0.688, loss=67.242, backward_time=0.754, grad_norm=97.794, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.737e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 03:21:20,631 (trainer:732) INFO: 15epoch:train:1901-2000batch: iter_time=9.980e-05, forward_time=0.107, loss_ctc=71.072, loss_att=53.719, acc=0.682, loss=58.925, backward_time=0.751, grad_norm=83.439, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.111, optim0_lr0=9.733e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 03:23:00,483 (trainer:732) INFO: 15epoch:train:2001-2100batch: iter_time=9.705e-05, forward_time=0.107, loss_ctc=74.810, loss_att=63.359, acc=0.675, loss=66.794, backward_time=0.751, grad_norm=91.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.729e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 03:24:40,225 (trainer:732) INFO: 15epoch:train:2101-2200batch: iter_time=9.846e-05, forward_time=0.107, loss_ctc=86.087, loss_att=63.950, acc=0.671, loss=70.591, backward_time=0.751, grad_norm=104.916, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.726e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 03:26:19,997 (trainer:732) INFO: 15epoch:train:2201-2300batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=81.168, loss_att=65.843, acc=0.683, loss=70.440, backward_time=0.751, grad_norm=85.546, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.722e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 03:28:19,638 (trainer:732) INFO: 15epoch:train:2301-2400batch: iter_time=9.845e-05, forward_time=0.106, loss_ctc=75.986, loss_att=55.773, acc=0.686, loss=61.837, backward_time=0.780, grad_norm=89.935, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.718e-05, train_time=2.393
+[gpua003:0/64] 2023-07-06 03:30:11,150 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 03:30:30,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950c3308b0>)
+[gpua003:0/64] 2023-07-06 03:30:33,644 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 03:30:33,650 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 03:33:45,898 (trainer:732) INFO: 15epoch:train:2401-2500batch: iter_time=1.301, forward_time=0.142, loss_ctc=78.818, loss_att=56.765, acc=0.693, loss=63.381, backward_time=0.782, grad_norm=91.105, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.715e-05, train_time=6.525
+[gpua003:0/64] 2023-07-06 03:35:27,576 (trainer:732) INFO: 15epoch:train:2501-2600batch: iter_time=1.159e-04, forward_time=0.110, loss_ctc=80.164, loss_att=60.607, acc=0.691, loss=66.474, backward_time=0.759, grad_norm=88.751, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.711e-05, train_time=2.034
+[gpua003:0/64] 2023-07-06 03:37:07,711 (trainer:732) INFO: 15epoch:train:2601-2700batch: iter_time=1.071e-04, forward_time=0.108, loss_ctc=89.216, loss_att=56.175, acc=0.699, loss=66.087, backward_time=0.752, grad_norm=94.279, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.707e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 03:38:47,535 (trainer:732) INFO: 15epoch:train:2701-2800batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=71.669, loss_att=52.232, acc=0.691, loss=58.063, backward_time=0.752, grad_norm=78.637, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.704e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 03:40:27,453 (trainer:732) INFO: 15epoch:train:2801-2900batch: iter_time=9.136e-05, forward_time=0.108, loss_ctc=72.118, loss_att=61.501, acc=0.688, loss=64.686, backward_time=0.752, grad_norm=86.278, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.700e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 03:42:07,650 (trainer:732) INFO: 15epoch:train:2901-3000batch: iter_time=9.709e-05, forward_time=0.108, loss_ctc=83.002, loss_att=63.713, acc=0.675, loss=69.500, backward_time=0.754, grad_norm=94.808, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.696e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 03:43:47,676 (trainer:732) INFO: 15epoch:train:3001-3100batch: iter_time=9.954e-05, forward_time=0.108, loss_ctc=85.122, loss_att=64.702, acc=0.694, loss=70.828, backward_time=0.754, grad_norm=92.367, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.693e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 03:45:30,195 (trainer:732) INFO: 15epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=74.716, loss_att=52.707, acc=0.702, loss=59.310, backward_time=0.753, grad_norm=81.698, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.689e-05, train_time=2.050
+[gpua003:0/64] 2023-07-06 03:47:13,089 (trainer:732) INFO: 15epoch:train:3201-3300batch: iter_time=9.444e-05, forward_time=0.108, loss_ctc=77.867, loss_att=61.783, acc=0.691, loss=66.608, backward_time=0.759, grad_norm=98.749, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.685e-05, train_time=2.058
+[gpua003:0/64] 2023-07-06 03:47:53,832 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 03:48:12,960 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 03:48:16,513 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520770130>)
+[gpua003:0/64] 2023-07-06 03:48:16,514 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 03:48:16,520 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 03:53:07,778 (trainer:732) INFO: 15epoch:train:3301-3400batch: iter_time=1.891, forward_time=0.108, loss_ctc=80.670, loss_att=59.551, acc=0.693, loss=65.887, backward_time=0.768, grad_norm=91.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.682e-05, train_time=7.094
+[gpua003:0/64] 2023-07-06 03:54:48,045 (trainer:732) INFO: 15epoch:train:3401-3500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=76.671, loss_att=54.588, acc=0.697, loss=61.213, backward_time=0.753, grad_norm=86.293, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.678e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 03:56:29,514 (trainer:732) INFO: 15epoch:train:3501-3600batch: iter_time=9.121e-05, forward_time=0.108, loss_ctc=83.637, loss_att=55.309, acc=0.702, loss=63.808, backward_time=0.753, grad_norm=89.277, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.674e-05, train_time=2.029
+[gpua003:0/64] 2023-07-06 03:58:09,348 (trainer:732) INFO: 15epoch:train:3601-3700batch: iter_time=9.560e-05, forward_time=0.108, loss_ctc=70.720, loss_att=55.269, acc=0.679, loss=59.904, backward_time=0.751, grad_norm=89.133, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.671e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 03:59:49,035 (trainer:732) INFO: 15epoch:train:3701-3800batch: iter_time=9.637e-05, forward_time=0.107, loss_ctc=77.221, loss_att=62.310, acc=0.690, loss=66.783, backward_time=0.751, grad_norm=96.452, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.667e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:01:31,489 (trainer:732) INFO: 15epoch:train:3801-3900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=86.594, loss_att=65.894, acc=0.691, loss=72.104, backward_time=0.753, grad_norm=92.320, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.664e-05, train_time=2.049
+[gpua003:0/64] 2023-07-06 04:03:11,208 (trainer:732) INFO: 15epoch:train:3901-4000batch: iter_time=9.501e-05, forward_time=0.107, loss_ctc=78.169, loss_att=56.640, acc=0.693, loss=63.098, backward_time=0.750, grad_norm=89.150, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.660e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:04:51,122 (trainer:732) INFO: 15epoch:train:4001-4100batch: iter_time=9.624e-05, forward_time=0.107, loss_ctc=76.858, loss_att=56.884, acc=0.686, loss=62.876, backward_time=0.751, grad_norm=103.211, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.656e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 04:05:57,372 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 04:06:16,333 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3f2dc60>)
+[gpua003:0/64] 2023-07-06 04:06:19,864 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 04:06:19,870 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 04:09:19,124 (trainer:732) INFO: 15epoch:train:4101-4200batch: iter_time=1.298, forward_time=0.107, loss_ctc=76.656, loss_att=57.378, acc=0.699, loss=63.161, backward_time=0.761, grad_norm=84.164, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.653e-05, train_time=5.360
+[gpua003:0/64] 2023-07-06 04:10:59,509 (trainer:732) INFO: 15epoch:train:4201-4300batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=77.716, loss_att=58.424, acc=0.694, loss=64.212, backward_time=0.754, grad_norm=86.815, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.649e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 04:12:39,763 (trainer:732) INFO: 15epoch:train:4301-4400batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=86.970, loss_att=57.258, acc=0.694, loss=66.171, backward_time=0.754, grad_norm=89.527, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.646e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 04:14:19,772 (trainer:732) INFO: 15epoch:train:4401-4500batch: iter_time=8.559e-05, forward_time=0.108, loss_ctc=70.095, loss_att=53.690, acc=0.681, loss=58.611, backward_time=0.754, grad_norm=78.614, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.642e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 04:15:59,517 (trainer:732) INFO: 15epoch:train:4501-4600batch: iter_time=9.013e-05, forward_time=0.107, loss_ctc=74.563, loss_att=61.596, acc=0.680, loss=65.486, backward_time=0.751, grad_norm=89.324, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.638e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 04:17:39,351 (trainer:732) INFO: 15epoch:train:4601-4700batch: iter_time=9.033e-05, forward_time=0.108, loss_ctc=83.925, loss_att=60.152, acc=0.693, loss=67.284, backward_time=0.752, grad_norm=90.456, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.635e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 04:19:19,250 (trainer:732) INFO: 15epoch:train:4701-4800batch: iter_time=9.522e-05, forward_time=0.108, loss_ctc=78.761, loss_att=63.200, acc=0.690, loss=67.868, backward_time=0.753, grad_norm=86.126, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.631e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 04:20:59,100 (trainer:732) INFO: 15epoch:train:4801-4900batch: iter_time=9.812e-05, forward_time=0.107, loss_ctc=74.851, loss_att=55.680, acc=0.694, loss=61.431, backward_time=0.752, grad_norm=103.100, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.628e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 04:22:39,137 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 04:22:58,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f97f2fd00>)
+[gpua003:0/64] 2023-07-06 04:23:01,981 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 04:23:01,987 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 04:26:15,035 (trainer:732) INFO: 15epoch:train:4901-5000batch: iter_time=1.298, forward_time=0.107, loss_ctc=79.639, loss_att=56.728, acc=0.693, loss=63.601, backward_time=0.757, grad_norm=91.688, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=6.318
+[gpua003:0/64] 2023-07-06 04:27:57,014 (trainer:732) INFO: 15epoch:train:5001-5100batch: iter_time=1.188e-04, forward_time=0.110, loss_ctc=79.481, loss_att=59.825, acc=0.685, loss=65.722, backward_time=0.758, grad_norm=89.848, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.113, optim0_lr0=9.621e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 04:29:37,013 (trainer:732) INFO: 15epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=86.315, loss_att=56.327, acc=0.690, loss=65.324, backward_time=0.752, grad_norm=105.057, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.617e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 04:31:16,972 (trainer:732) INFO: 15epoch:train:5201-5300batch: iter_time=1.038e-04, forward_time=0.106, loss_ctc=70.861, loss_att=50.758, acc=0.689, loss=56.789, backward_time=0.752, grad_norm=87.606, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.614e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 04:32:56,687 (trainer:732) INFO: 15epoch:train:5301-5400batch: iter_time=9.220e-05, forward_time=0.106, loss_ctc=71.546, loss_att=60.197, acc=0.685, loss=63.601, backward_time=0.750, grad_norm=91.661, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.610e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:34:36,595 (trainer:732) INFO: 15epoch:train:5401-5500batch: iter_time=9.018e-05, forward_time=0.107, loss_ctc=82.998, loss_att=65.769, acc=0.666, loss=70.938, backward_time=0.752, grad_norm=109.581, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.606e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 04:36:16,255 (trainer:732) INFO: 15epoch:train:5501-5600batch: iter_time=9.943e-05, forward_time=0.107, loss_ctc=82.860, loss_att=64.257, acc=0.691, loss=69.838, backward_time=0.750, grad_norm=91.728, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.603e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 04:37:55,929 (trainer:732) INFO: 15epoch:train:5601-5700batch: iter_time=9.638e-05, forward_time=0.106, loss_ctc=74.731, loss_att=53.015, acc=0.702, loss=59.530, backward_time=0.751, grad_norm=90.537, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.111, optim0_lr0=9.599e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 04:39:35,714 (trainer:732) INFO: 15epoch:train:5701-5800batch: iter_time=9.332e-05, forward_time=0.107, loss_ctc=76.746, loss_att=60.403, acc=0.686, loss=65.306, backward_time=0.751, grad_norm=100.897, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.596e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 04:40:08,943 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 04:40:28,479 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a02697520>)
+[gpua003:0/64] 2023-07-06 04:40:32,043 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 04:40:32,049 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 04:44:18,911 (trainer:732) INFO: 15epoch:train:5801-5900batch: iter_time=1.331, forward_time=0.108, loss_ctc=75.757, loss_att=54.746, acc=0.692, loss=61.049, backward_time=0.767, grad_norm=90.029, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.592e-05, train_time=5.664
+[gpua003:0/64] 2023-07-06 04:45:59,455 (trainer:732) INFO: 15epoch:train:5901-6000batch: iter_time=1.005e-04, forward_time=0.107, loss_ctc=80.541, loss_att=59.034, acc=0.691, loss=65.486, backward_time=0.753, grad_norm=101.034, clip=100.000, loss_scale=5.629e+14, optim_step_time=0.112, optim0_lr0=9.589e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 04:47:39,684 (trainer:732) INFO: 15epoch:train:6001-6100batch: iter_time=9.840e-05, forward_time=0.107, loss_ctc=77.216, loss_att=50.560, acc=0.689, loss=58.557, backward_time=0.750, grad_norm=91.962, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.585e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 04:49:19,460 (trainer:732) INFO: 15epoch:train:6101-6200batch: iter_time=9.863e-05, forward_time=0.108, loss_ctc=70.142, loss_att=56.782, acc=0.679, loss=60.790, backward_time=0.751, grad_norm=87.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.582e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 04:50:59,293 (trainer:732) INFO: 15epoch:train:6201-6300batch: iter_time=9.683e-05, forward_time=0.107, loss_ctc=84.691, loss_att=67.527, acc=0.672, loss=72.676, backward_time=0.751, grad_norm=96.501, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.578e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 04:52:39,001 (trainer:732) INFO: 15epoch:train:6301-6400batch: iter_time=9.518e-05, forward_time=0.108, loss_ctc=78.908, loss_att=59.469, acc=0.696, loss=65.301, backward_time=0.751, grad_norm=90.460, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.575e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 04:54:18,553 (trainer:732) INFO: 15epoch:train:6401-6500batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=79.232, loss_att=59.510, acc=0.686, loss=65.427, backward_time=0.749, grad_norm=104.175, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.571e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 04:55:58,195 (trainer:732) INFO: 15epoch:train:6501-6600batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=74.355, loss_att=57.326, acc=0.685, loss=62.435, backward_time=0.750, grad_norm=89.442, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.568e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 04:57:05,281 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 04:57:24,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3dab7f0>)
+[gpua003:0/64] 2023-07-06 04:57:27,875 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 04:57:27,881 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:03:01,395 (trainer:732) INFO: 15epoch:train:6601-6700batch: iter_time=1.280, forward_time=0.108, loss_ctc=78.273, loss_att=57.077, acc=0.685, loss=63.436, backward_time=0.759, grad_norm=85.784, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.564e-05, train_time=8.464
+[gpua003:0/64] 2023-07-06 05:04:42,396 (trainer:732) INFO: 15epoch:train:6701-6800batch: iter_time=1.146e-04, forward_time=0.110, loss_ctc=75.552, loss_att=54.344, acc=0.708, loss=60.707, backward_time=0.754, grad_norm=87.661, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.561e-05, train_time=2.020
+[gpua003:0/64] 2023-07-06 05:06:22,783 (trainer:732) INFO: 15epoch:train:6801-6900batch: iter_time=1.151e-04, forward_time=0.109, loss_ctc=87.760, loss_att=56.616, acc=0.702, loss=65.959, backward_time=0.754, grad_norm=109.179, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.557e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 05:08:02,835 (trainer:732) INFO: 15epoch:train:6901-7000batch: iter_time=1.119e-04, forward_time=0.110, loss_ctc=68.674, loss_att=52.588, acc=0.694, loss=57.414, backward_time=0.753, grad_norm=86.799, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.554e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 05:09:42,653 (trainer:732) INFO: 15epoch:train:7001-7100batch: iter_time=1.118e-04, forward_time=0.109, loss_ctc=73.621, loss_att=61.316, acc=0.689, loss=65.008, backward_time=0.752, grad_norm=105.693, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.550e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 05:11:22,495 (trainer:732) INFO: 15epoch:train:7101-7200batch: iter_time=1.103e-04, forward_time=0.109, loss_ctc=83.133, loss_att=61.505, acc=0.686, loss=67.994, backward_time=0.752, grad_norm=104.402, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.547e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 05:13:02,248 (trainer:732) INFO: 15epoch:train:7201-7300batch: iter_time=1.110e-04, forward_time=0.109, loss_ctc=81.010, loss_att=64.819, acc=0.692, loss=69.677, backward_time=0.751, grad_norm=110.286, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.543e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 05:14:42,319 (trainer:732) INFO: 15epoch:train:7301-7400batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=72.689, loss_att=54.067, acc=0.697, loss=59.654, backward_time=0.752, grad_norm=85.938, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.540e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 05:16:26,124 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 05:16:45,225 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 05:16:48,776 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884e257fd0>)
+[gpua003:0/64] 2023-07-06 05:16:48,777 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 05:16:48,783 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:21:21,795 (trainer:732) INFO: 15epoch:train:7401-7500batch: iter_time=1.282, forward_time=0.110, loss_ctc=75.686, loss_att=56.849, acc=0.702, loss=62.500, backward_time=0.778, grad_norm=88.301, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.536e-05, train_time=7.989
+[gpua003:0/64] 2023-07-06 05:23:04,740 (trainer:732) INFO: 15epoch:train:7501-7600batch: iter_time=1.225e-04, forward_time=0.109, loss_ctc=74.838, loss_att=55.781, acc=0.699, loss=61.498, backward_time=0.759, grad_norm=93.290, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.533e-05, train_time=2.059
+[gpua003:0/64] 2023-07-06 05:24:45,328 (trainer:732) INFO: 15epoch:train:7601-7700batch: iter_time=1.022e-04, forward_time=0.110, loss_ctc=83.499, loss_att=54.322, acc=0.699, loss=63.075, backward_time=0.754, grad_norm=90.328, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.529e-05, train_time=2.012
+[gpua003:0/64] 2023-07-06 05:26:25,161 (trainer:732) INFO: 15epoch:train:7701-7800batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=71.384, loss_att=51.519, acc=0.703, loss=57.479, backward_time=0.751, grad_norm=86.506, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.526e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 05:28:04,981 (trainer:732) INFO: 15epoch:train:7801-7900batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=73.904, loss_att=64.610, acc=0.679, loss=67.398, backward_time=0.751, grad_norm=102.632, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.522e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 05:29:44,880 (trainer:732) INFO: 15epoch:train:7901-8000batch: iter_time=1.167e-04, forward_time=0.109, loss_ctc=83.085, loss_att=62.060, acc=0.679, loss=68.368, backward_time=0.753, grad_norm=103.977, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.519e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 05:31:24,806 (trainer:732) INFO: 15epoch:train:8001-8100batch: iter_time=1.085e-04, forward_time=0.109, loss_ctc=80.413, loss_att=61.210, acc=0.698, loss=66.971, backward_time=0.752, grad_norm=96.822, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.516e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 05:33:06,003 (trainer:732) INFO: 15epoch:train:8101-8200batch: iter_time=9.574e-05, forward_time=0.109, loss_ctc=75.570, loss_att=55.287, acc=0.694, loss=61.372, backward_time=0.753, grad_norm=88.815, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.512e-05, train_time=2.024
+[gpua003:0/64] 2023-07-06 05:34:48,433 (trainer:732) INFO: 15epoch:train:8201-8300batch: iter_time=1.065e-04, forward_time=0.110, loss_ctc=73.115, loss_att=57.381, acc=0.707, loss=62.101, backward_time=0.755, grad_norm=84.564, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.509e-05, train_time=2.048
+[gpua003:0/64] 2023-07-06 05:35:22,627 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 05:35:42,157 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884e29b7c0>)
+[gpua003:0/64] 2023-07-06 05:35:45,995 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 05:35:46,001 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:40:45,752 (trainer:732) INFO: 15epoch:train:8301-8400batch: iter_time=1.326, forward_time=0.109, loss_ctc=77.487, loss_att=56.217, acc=0.681, loss=62.598, backward_time=0.787, grad_norm=82.284, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.505e-05, train_time=7.146
+[gpua003:0/64] 2023-07-06 05:42:30,027 (trainer:732) INFO: 15epoch:train:8401-8500batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=81.840, loss_att=60.268, acc=0.694, loss=66.740, backward_time=0.763, grad_norm=98.195, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.502e-05, train_time=2.085
+[gpua003:0/64] 2023-07-06 05:44:15,543 (trainer:732) INFO: 15epoch:train:8501-8600batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=78.760, loss_att=51.979, acc=0.691, loss=60.013, backward_time=0.768, grad_norm=92.124, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.498e-05, train_time=2.110
+[gpua003:0/64] 2023-07-06 05:46:05,064 (trainer:732) INFO: 15epoch:train:8601-8700batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=70.996, loss_att=57.564, acc=0.676, loss=61.593, backward_time=0.777, grad_norm=91.103, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.495e-05, train_time=2.190
+[gpua003:0/64] 2023-07-06 05:47:51,510 (trainer:732) INFO: 15epoch:train:8701-8800batch: iter_time=1.110e-04, forward_time=0.108, loss_ctc=80.422, loss_att=66.412, acc=0.672, loss=70.615, backward_time=0.757, grad_norm=94.400, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.491e-05, train_time=2.129
+[gpua003:0/64] 2023-07-06 05:49:31,215 (trainer:732) INFO: 15epoch:train:8801-8900batch: iter_time=1.153e-04, forward_time=0.108, loss_ctc=78.907, loss_att=60.674, acc=0.693, loss=66.144, backward_time=0.750, grad_norm=89.498, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.488e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 05:51:11,993 (trainer:732) INFO: 15epoch:train:8901-9000batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=76.299, loss_att=56.694, acc=0.694, loss=62.575, backward_time=0.751, grad_norm=82.090, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.485e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 05:52:52,430 (trainer:732) INFO: 15epoch:train:9001-9100batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=74.360, loss_att=57.538, acc=0.689, loss=62.585, backward_time=0.750, grad_norm=87.996, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.481e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 05:54:01,666 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 05:54:20,653 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95205e3ee0>)
+[gpua003:0/64] 2023-07-06 05:54:24,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 05:54:24,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 05:58:56,052 (trainer:732) INFO: 15epoch:train:9101-9200batch: iter_time=1.330, forward_time=0.135, loss_ctc=76.652, loss_att=55.319, acc=0.689, loss=61.719, backward_time=0.760, grad_norm=94.798, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.113, optim0_lr0=9.478e-05, train_time=7.272
+[gpua003:0/64] 2023-07-06 06:00:39,277 (trainer:732) INFO: 15epoch:train:9201-9300batch: iter_time=9.552e-05, forward_time=0.111, loss_ctc=77.373, loss_att=56.510, acc=0.706, loss=62.769, backward_time=0.757, grad_norm=98.925, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.474e-05, train_time=2.064
+[gpua003:0/64] 2023-07-06 06:02:26,540 (trainer:732) INFO: 15epoch:train:9301-9400batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=86.609, loss_att=55.256, acc=0.704, loss=64.662, backward_time=0.763, grad_norm=94.931, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.471e-05, train_time=2.145
+[gpua003:0/64] 2023-07-06 06:04:09,380 (trainer:732) INFO: 15epoch:train:9401-9500batch: iter_time=1.045e-04, forward_time=0.109, loss_ctc=69.832, loss_att=52.386, acc=0.693, loss=57.620, backward_time=0.753, grad_norm=89.310, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.468e-05, train_time=2.057
+[gpua003:0/64] 2023-07-06 06:05:51,252 (trainer:732) INFO: 15epoch:train:9501-9600batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.533, loss_att=62.841, acc=0.688, loss=66.049, backward_time=0.751, grad_norm=93.939, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.464e-05, train_time=2.037
+[gpua003:0/64] 2023-07-06 06:07:41,819 (trainer:732) INFO: 15epoch:train:9601-9700batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=83.150, loss_att=60.639, acc=0.687, loss=67.393, backward_time=0.774, grad_norm=94.514, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.461e-05, train_time=2.211
+[gpua003:0/64] 2023-07-06 06:09:23,340 (trainer:732) INFO: 15epoch:train:9701-9800batch: iter_time=1.286e-04, forward_time=0.108, loss_ctc=81.189, loss_att=65.286, acc=0.694, loss=70.057, backward_time=0.752, grad_norm=92.569, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.457e-05, train_time=2.030
+[gpua003:0/64] 2023-07-06 06:11:03,236 (trainer:732) INFO: 15epoch:train:9801-9900batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=72.362, loss_att=53.624, acc=0.694, loss=59.246, backward_time=0.751, grad_norm=87.692, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.111, optim0_lr0=9.454e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 06:12:42,895 (trainer:732) INFO: 15epoch:train:9901-10000batch: iter_time=1.061e-04, forward_time=0.107, loss_ctc=75.625, loss_att=55.477, acc=0.704, loss=61.522, backward_time=0.750, grad_norm=86.254, clip=100.000, loss_scale=1.126e+15, optim_step_time=0.112, optim0_lr0=9.451e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 06:24:28,681 (trainer:338) INFO: 15epoch results: [train] iter_time=0.185, forward_time=0.110, loss_ctc=78.489, loss_att=58.649, acc=0.688, loss=64.601, backward_time=0.756, grad_norm=93.007, clip=100.000, loss_scale=7.318e+14, optim_step_time=0.112, optim0_lr0=9.624e-05, train_time=2.644, time=3 hours, 40 minutes and 32.98 seconds, total_count=120000, gpu_max_cached_mem_GB=37.770, [valid] loss_ctc=52.634, cer_ctc=0.298, loss_att=43.555, acc=0.657, cer=0.377, wer=0.991, loss=46.279, time=5 minutes and 28.62 seconds, total_count=12650, gpu_max_cached_mem_GB=37.770, [att_plot] time=6 minutes and 8.01 seconds, total_count=0, gpu_max_cached_mem_GB=37.770
+[gpua003:0/64] 2023-07-06 06:24:45,984 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua003:0/64] 2023-07-06 06:24:45,990 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/9epoch.pth, exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/10epoch.pth
+[gpua003:0/64] 2023-07-06 06:24:46,016 (trainer:272) INFO: 16/100epoch started. Estimated time to finish: 1 week, 6 days and 16 hours
+[gpua003:0/64] 2023-07-06 06:24:46,836 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 06:25:05,794 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f2255bbb0>)
+[gpua003:0/64] 2023-07-06 06:25:10,550 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 06:25:10,617 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 06:31:07,295 (trainer:732) INFO: 16epoch:train:1-100batch: iter_time=2.752, forward_time=0.134, loss_ctc=75.117, loss_att=48.716, acc=0.689, loss=56.636, backward_time=0.771, grad_norm=101.201, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.116, optim0_lr0=9.447e-05, train_time=7.615
+[gpua003:0/64] 2023-07-06 06:32:48,088 (trainer:732) INFO: 16epoch:train:101-200batch: iter_time=1.138e-04, forward_time=0.108, loss_ctc=74.028, loss_att=52.520, acc=0.697, loss=58.972, backward_time=0.754, grad_norm=94.459, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.444e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 06:34:29,724 (trainer:732) INFO: 16epoch:train:201-300batch: iter_time=1.056e-04, forward_time=0.109, loss_ctc=64.990, loss_att=51.033, acc=0.694, loss=55.220, backward_time=0.752, grad_norm=82.414, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.441e-05, train_time=2.033
+[gpua003:0/64] 2023-07-06 06:36:17,878 (trainer:732) INFO: 16epoch:train:301-400batch: iter_time=1.070e-04, forward_time=0.108, loss_ctc=76.120, loss_att=59.374, acc=0.683, loss=64.398, backward_time=0.760, grad_norm=93.942, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.437e-05, train_time=2.163
+[gpua003:0/64] 2023-07-06 06:38:00,826 (trainer:732) INFO: 16epoch:train:401-500batch: iter_time=1.101e-04, forward_time=0.107, loss_ctc=67.187, loss_att=49.787, acc=0.695, loss=55.007, backward_time=0.754, grad_norm=82.026, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.434e-05, train_time=2.059
+[gpua003:0/64] 2023-07-06 06:39:53,617 (trainer:732) INFO: 16epoch:train:501-600batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=81.351, loss_att=59.096, acc=0.689, loss=65.772, backward_time=0.772, grad_norm=90.956, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.431e-05, train_time=2.256
+[gpua003:0/64] 2023-07-06 06:41:43,234 (trainer:732) INFO: 16epoch:train:601-700batch: iter_time=9.752e-05, forward_time=0.108, loss_ctc=87.613, loss_att=64.737, acc=0.688, loss=71.600, backward_time=0.776, grad_norm=130.042, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.427e-05, train_time=2.192
+[gpua003:0/64] 2023-07-06 06:43:33,517 (trainer:732) INFO: 16epoch:train:701-800batch: iter_time=1.003e-04, forward_time=0.109, loss_ctc=72.526, loss_att=59.461, acc=0.682, loss=63.381, backward_time=0.768, grad_norm=92.190, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.424e-05, train_time=2.205
+[gpua003:0/64] 2023-07-06 06:44:14,263 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 06:44:33,081 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950ddea110>)
+[gpua003:0/64] 2023-07-06 06:44:36,840 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 06:44:36,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 06:48:21,733 (trainer:732) INFO: 16epoch:train:801-900batch: iter_time=1.321, forward_time=0.107, loss_ctc=80.667, loss_att=58.981, acc=0.688, loss=65.487, backward_time=0.768, grad_norm=96.967, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.420e-05, train_time=5.764
+[gpua003:0/64] 2023-07-06 06:50:06,977 (trainer:732) INFO: 16epoch:train:901-1000batch: iter_time=9.995e-05, forward_time=0.107, loss_ctc=71.004, loss_att=50.281, acc=0.699, loss=56.498, backward_time=0.758, grad_norm=96.342, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.417e-05, train_time=2.105
+[gpua003:0/64] 2023-07-06 06:51:47,247 (trainer:732) INFO: 16epoch:train:1001-1100batch: iter_time=1.007e-04, forward_time=0.107, loss_ctc=62.698, loss_att=47.891, acc=0.697, loss=52.333, backward_time=0.752, grad_norm=74.518, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.414e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 06:53:27,011 (trainer:732) INFO: 16epoch:train:1101-1200batch: iter_time=1.048e-04, forward_time=0.108, loss_ctc=75.410, loss_att=57.557, acc=0.687, loss=62.913, backward_time=0.751, grad_norm=83.906, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.410e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 06:55:06,788 (trainer:732) INFO: 16epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.107, loss_ctc=64.005, loss_att=47.055, acc=0.700, loss=52.140, backward_time=0.751, grad_norm=79.784, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.407e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 06:56:46,657 (trainer:732) INFO: 16epoch:train:1301-1400batch: iter_time=9.661e-05, forward_time=0.107, loss_ctc=81.759, loss_att=59.701, acc=0.678, loss=66.319, backward_time=0.751, grad_norm=92.222, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.404e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 06:58:26,430 (trainer:732) INFO: 16epoch:train:1401-1500batch: iter_time=9.571e-05, forward_time=0.107, loss_ctc=82.212, loss_att=60.520, acc=0.699, loss=67.027, backward_time=0.752, grad_norm=85.514, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.400e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:00:06,377 (trainer:732) INFO: 16epoch:train:1501-1600batch: iter_time=9.767e-05, forward_time=0.108, loss_ctc=70.485, loss_att=56.946, acc=0.687, loss=61.008, backward_time=0.752, grad_norm=93.511, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.397e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 07:01:14,601 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 07:01:33,950 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e698e350>)
+[gpua003:0/64] 2023-07-06 07:01:37,876 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 07:01:37,882 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:05:32,440 (trainer:732) INFO: 16epoch:train:1601-1700batch: iter_time=1.290, forward_time=0.108, loss_ctc=76.855, loss_att=60.487, acc=0.696, loss=65.397, backward_time=0.761, grad_norm=104.000, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.394e-05, train_time=6.521
+[gpua003:0/64] 2023-07-06 07:07:12,660 (trainer:732) INFO: 16epoch:train:1701-1800batch: iter_time=9.317e-05, forward_time=0.107, loss_ctc=68.609, loss_att=46.164, acc=0.700, loss=52.897, backward_time=0.753, grad_norm=92.800, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.391e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 07:08:52,753 (trainer:732) INFO: 16epoch:train:1801-1900batch: iter_time=8.951e-05, forward_time=0.107, loss_ctc=67.542, loss_att=52.449, acc=0.694, loss=56.977, backward_time=0.752, grad_norm=85.811, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.387e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 07:10:32,527 (trainer:732) INFO: 16epoch:train:1901-2000batch: iter_time=8.930e-05, forward_time=0.107, loss_ctc=69.790, loss_att=55.626, acc=0.680, loss=59.875, backward_time=0.751, grad_norm=78.275, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.384e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:12:12,369 (trainer:732) INFO: 16epoch:train:2001-2100batch: iter_time=1.018e-04, forward_time=0.108, loss_ctc=69.942, loss_att=52.418, acc=0.688, loss=57.675, backward_time=0.752, grad_norm=83.432, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.381e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 07:13:52,056 (trainer:732) INFO: 16epoch:train:2101-2200batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=66.694, loss_att=50.173, acc=0.698, loss=55.129, backward_time=0.752, grad_norm=74.273, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.377e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:15:31,827 (trainer:732) INFO: 16epoch:train:2201-2300batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=86.699, loss_att=65.479, acc=0.678, loss=71.845, backward_time=0.753, grad_norm=112.678, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.374e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:17:11,502 (trainer:732) INFO: 16epoch:train:2301-2400batch: iter_time=1.009e-04, forward_time=0.109, loss_ctc=78.962, loss_att=61.486, acc=0.676, loss=66.729, backward_time=0.752, grad_norm=92.485, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.371e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:18:51,509 (trainer:732) INFO: 16epoch:train:2401-2500batch: iter_time=1.015e-04, forward_time=0.110, loss_ctc=75.890, loss_att=62.591, acc=0.690, loss=66.581, backward_time=0.754, grad_norm=93.157, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.367e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 07:18:52,779 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 07:19:12,128 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944a257d60>)
+[gpua003:0/64] 2023-07-06 07:19:15,913 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 07:19:15,919 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:24:48,132 (trainer:732) INFO: 16epoch:train:2501-2600batch: iter_time=1.279, forward_time=0.108, loss_ctc=71.726, loss_att=46.606, acc=0.697, loss=54.142, backward_time=0.767, grad_norm=105.412, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.364e-05, train_time=7.132
+[gpua003:0/64] 2023-07-06 07:26:27,825 (trainer:732) INFO: 16epoch:train:2601-2700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=66.631, loss_att=51.594, acc=0.698, loss=56.105, backward_time=0.752, grad_norm=90.533, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.361e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:28:07,530 (trainer:732) INFO: 16epoch:train:2701-2800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=70.966, loss_att=55.570, acc=0.680, loss=60.189, backward_time=0.752, grad_norm=86.466, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.358e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:29:47,312 (trainer:732) INFO: 16epoch:train:2801-2900batch: iter_time=1.072e-04, forward_time=0.108, loss_ctc=71.097, loss_att=53.450, acc=0.675, loss=58.744, backward_time=0.753, grad_norm=93.208, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.354e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:31:27,133 (trainer:732) INFO: 16epoch:train:2901-3000batch: iter_time=8.955e-05, forward_time=0.108, loss_ctc=63.821, loss_att=46.903, acc=0.701, loss=51.979, backward_time=0.752, grad_norm=74.418, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.351e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 07:33:06,795 (trainer:732) INFO: 16epoch:train:3001-3100batch: iter_time=9.687e-05, forward_time=0.108, loss_ctc=82.866, loss_att=60.932, acc=0.685, loss=67.512, backward_time=0.752, grad_norm=97.470, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.348e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:34:46,471 (trainer:732) INFO: 16epoch:train:3101-3200batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=82.047, loss_att=62.386, acc=0.681, loss=68.285, backward_time=0.752, grad_norm=94.406, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.344e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:36:26,118 (trainer:732) INFO: 16epoch:train:3201-3300batch: iter_time=9.042e-05, forward_time=0.108, loss_ctc=77.669, loss_att=65.484, acc=0.671, loss=69.140, backward_time=0.752, grad_norm=91.421, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.341e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 07:37:01,224 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 07:37:20,629 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3d1e7d0>)
+[gpua003:0/64] 2023-07-06 07:37:24,141 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 07:37:24,147 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:41:53,320 (trainer:732) INFO: 16epoch:train:3301-3400batch: iter_time=2.180, forward_time=0.108, loss_ctc=78.290, loss_att=55.680, acc=0.688, loss=62.463, backward_time=0.766, grad_norm=91.637, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.338e-05, train_time=6.544
+[gpua003:0/64] 2023-07-06 07:43:33,605 (trainer:732) INFO: 16epoch:train:3401-3500batch: iter_time=1.037e-04, forward_time=0.107, loss_ctc=68.344, loss_att=49.906, acc=0.712, loss=55.438, backward_time=0.754, grad_norm=81.948, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.112, optim0_lr0=9.335e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 07:45:13,347 (trainer:732) INFO: 16epoch:train:3501-3600batch: iter_time=9.421e-05, forward_time=0.108, loss_ctc=65.417, loss_att=50.435, acc=0.688, loss=54.930, backward_time=0.752, grad_norm=90.718, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.331e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 07:46:53,165 (trainer:732) INFO: 16epoch:train:3601-3700batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.603, loss_att=51.998, acc=0.686, loss=57.579, backward_time=0.751, grad_norm=85.036, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.328e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 07:48:33,178 (trainer:732) INFO: 16epoch:train:3701-3800batch: iter_time=1.098e-04, forward_time=0.109, loss_ctc=65.655, loss_att=51.080, acc=0.697, loss=55.453, backward_time=0.753, grad_norm=88.931, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.325e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 07:50:12,906 (trainer:732) INFO: 16epoch:train:3801-3900batch: iter_time=9.838e-05, forward_time=0.108, loss_ctc=74.297, loss_att=55.407, acc=0.684, loss=61.074, backward_time=0.751, grad_norm=82.975, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.322e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 07:51:56,731 (trainer:732) INFO: 16epoch:train:3901-4000batch: iter_time=9.921e-05, forward_time=0.109, loss_ctc=84.998, loss_att=61.759, acc=0.691, loss=68.731, backward_time=0.760, grad_norm=94.665, clip=100.000, loss_scale=2.252e+15, optim_step_time=0.113, optim0_lr0=9.318e-05, train_time=2.076
+[gpua003:0/64] 2023-07-06 07:53:42,883 (trainer:732) INFO: 16epoch:train:4001-4100batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=75.067, loss_att=62.875, acc=0.670, loss=66.533, backward_time=0.758, grad_norm=91.761, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.315e-05, train_time=2.123
+[gpua003:0/64] 2023-07-06 07:54:50,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 07:55:10,233 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97a3d44dc0>)
+[gpua003:0/64] 2023-07-06 07:55:13,721 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 07:55:13,728 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 07:58:55,908 (trainer:732) INFO: 16epoch:train:4101-4200batch: iter_time=2.076, forward_time=0.141, loss_ctc=74.642, loss_att=55.845, acc=0.693, loss=61.484, backward_time=0.761, grad_norm=97.003, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.114, optim0_lr0=9.312e-05, train_time=6.260
+[gpua003:0/64] 2023-07-06 08:00:37,112 (trainer:732) INFO: 16epoch:train:4201-4300batch: iter_time=1.039e-04, forward_time=0.107, loss_ctc=67.813, loss_att=44.642, acc=0.709, loss=51.593, backward_time=0.754, grad_norm=88.191, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.111, optim0_lr0=9.309e-05, train_time=2.024
+[gpua003:0/64] 2023-07-06 08:02:16,904 (trainer:732) INFO: 16epoch:train:4301-4400batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=68.215, loss_att=52.130, acc=0.696, loss=56.955, backward_time=0.752, grad_norm=94.395, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.306e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 08:03:56,527 (trainer:732) INFO: 16epoch:train:4401-4500batch: iter_time=9.344e-05, forward_time=0.106, loss_ctc=69.906, loss_att=54.891, acc=0.683, loss=59.395, backward_time=0.751, grad_norm=83.039, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.302e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 08:05:37,831 (trainer:732) INFO: 16epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=68.748, loss_att=51.394, acc=0.689, loss=56.600, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.299e-05, train_time=2.026
+[gpua003:0/64] 2023-07-06 08:07:17,508 (trainer:732) INFO: 16epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.107, loss_ctc=65.443, loss_att=48.597, acc=0.702, loss=53.651, backward_time=0.752, grad_norm=111.503, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.296e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 08:08:57,106 (trainer:732) INFO: 16epoch:train:4701-4800batch: iter_time=1.018e-04, forward_time=0.107, loss_ctc=84.020, loss_att=64.596, acc=0.679, loss=70.423, backward_time=0.751, grad_norm=127.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.293e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 08:10:36,976 (trainer:732) INFO: 16epoch:train:4801-4900batch: iter_time=1.060e-04, forward_time=0.108, loss_ctc=76.921, loss_att=58.835, acc=0.684, loss=64.261, backward_time=0.752, grad_norm=114.068, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.289e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 08:12:16,723 (trainer:732) INFO: 16epoch:train:4901-5000batch: iter_time=1.049e-04, forward_time=0.109, loss_ctc=74.951, loss_att=61.324, acc=0.694, loss=65.412, backward_time=0.751, grad_norm=92.245, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.286e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 08:12:18,610 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 08:12:37,768 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a02da2f20>)
+[gpua003:0/64] 2023-07-06 08:12:41,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 08:12:41,335 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 08:18:49,163 (trainer:732) INFO: 16epoch:train:5001-5100batch: iter_time=1.303, forward_time=0.116, loss_ctc=72.397, loss_att=47.284, acc=0.706, loss=54.818, backward_time=0.768, grad_norm=87.352, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.283e-05, train_time=7.848
+[gpua003:0/64] 2023-07-06 08:20:28,927 (trainer:732) INFO: 16epoch:train:5101-5200batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=69.999, loss_att=51.235, acc=0.705, loss=56.864, backward_time=0.750, grad_norm=90.609, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.280e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 08:22:09,028 (trainer:732) INFO: 16epoch:train:5201-5300batch: iter_time=8.618e-05, forward_time=0.109, loss_ctc=62.913, loss_att=49.293, acc=0.704, loss=53.379, backward_time=0.753, grad_norm=77.097, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.277e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 08:23:48,933 (trainer:732) INFO: 16epoch:train:5301-5400batch: iter_time=8.628e-05, forward_time=0.109, loss_ctc=72.795, loss_att=55.439, acc=0.694, loss=60.646, backward_time=0.752, grad_norm=88.288, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.273e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 08:25:28,752 (trainer:732) INFO: 16epoch:train:5401-5500batch: iter_time=9.338e-05, forward_time=0.109, loss_ctc=64.718, loss_att=47.439, acc=0.702, loss=52.623, backward_time=0.752, grad_norm=82.629, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.270e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 08:27:08,812 (trainer:732) INFO: 16epoch:train:5501-5600batch: iter_time=8.449e-05, forward_time=0.108, loss_ctc=78.843, loss_att=57.892, acc=0.696, loss=64.177, backward_time=0.754, grad_norm=92.290, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.267e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 08:28:55,560 (trainer:732) INFO: 16epoch:train:5601-5700batch: iter_time=8.973e-05, forward_time=0.107, loss_ctc=83.966, loss_att=62.853, acc=0.700, loss=69.187, backward_time=0.778, grad_norm=96.335, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.264e-05, train_time=2.135
+[gpua003:0/64] 2023-07-06 08:30:43,294 (trainer:732) INFO: 16epoch:train:5701-5800batch: iter_time=8.613e-05, forward_time=0.108, loss_ctc=69.962, loss_att=57.568, acc=0.690, loss=61.286, backward_time=0.762, grad_norm=89.847, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.261e-05, train_time=2.154
+[gpua003:0/64] 2023-07-06 08:31:17,439 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 08:31:36,652 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 08:31:40,135 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a3b503fd0>)
+[gpua003:0/64] 2023-07-06 08:31:40,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 08:31:40,142 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 08:35:45,247 (trainer:732) INFO: 16epoch:train:5801-5900batch: iter_time=1.276, forward_time=0.107, loss_ctc=80.360, loss_att=57.972, acc=0.693, loss=64.689, backward_time=0.765, grad_norm=106.653, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.258e-05, train_time=6.039
+[gpua003:0/64] 2023-07-06 08:37:25,600 (trainer:732) INFO: 16epoch:train:5901-6000batch: iter_time=9.151e-05, forward_time=0.107, loss_ctc=69.337, loss_att=47.625, acc=0.723, loss=54.139, backward_time=0.753, grad_norm=82.017, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.254e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 08:39:05,439 (trainer:732) INFO: 16epoch:train:6001-6100batch: iter_time=9.170e-05, forward_time=0.107, loss_ctc=65.149, loss_att=50.375, acc=0.696, loss=54.807, backward_time=0.752, grad_norm=92.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.251e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 08:40:45,317 (trainer:732) INFO: 16epoch:train:6101-6200batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.697, loss_att=51.205, acc=0.700, loss=57.052, backward_time=0.753, grad_norm=86.258, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.248e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 08:42:24,960 (trainer:732) INFO: 16epoch:train:6201-6300batch: iter_time=9.827e-05, forward_time=0.108, loss_ctc=64.757, loss_att=50.584, acc=0.702, loss=54.836, backward_time=0.751, grad_norm=79.471, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.245e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 08:44:04,631 (trainer:732) INFO: 16epoch:train:6301-6400batch: iter_time=1.131e-04, forward_time=0.109, loss_ctc=72.490, loss_att=54.788, acc=0.696, loss=60.098, backward_time=0.751, grad_norm=92.689, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.242e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 08:45:44,133 (trainer:732) INFO: 16epoch:train:6401-6500batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=85.155, loss_att=58.768, acc=0.706, loss=66.684, backward_time=0.749, grad_norm=98.779, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.239e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 08:47:23,850 (trainer:732) INFO: 16epoch:train:6501-6600batch: iter_time=1.083e-04, forward_time=0.109, loss_ctc=73.711, loss_att=60.604, acc=0.688, loss=64.536, backward_time=0.751, grad_norm=84.467, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.235e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 08:48:30,857 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 08:48:50,019 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 08:48:53,511 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884cdcfc40>)
+[gpua003:0/64] 2023-07-06 08:48:53,512 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 08:48:53,518 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 08:53:20,677 (trainer:732) INFO: 16epoch:train:6601-6700batch: iter_time=1.289, forward_time=0.108, loss_ctc=74.070, loss_att=54.401, acc=0.697, loss=60.302, backward_time=0.761, grad_norm=86.767, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.232e-05, train_time=7.136
+[gpua003:0/64] 2023-07-06 08:55:01,310 (trainer:732) INFO: 16epoch:train:6701-6800batch: iter_time=1.138e-04, forward_time=0.107, loss_ctc=71.082, loss_att=51.771, acc=0.709, loss=57.564, backward_time=0.755, grad_norm=90.309, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.229e-05, train_time=2.012
+[gpua003:0/64] 2023-07-06 08:56:41,532 (trainer:732) INFO: 16epoch:train:6801-6900batch: iter_time=1.140e-04, forward_time=0.108, loss_ctc=65.828, loss_att=50.568, acc=0.693, loss=55.146, backward_time=0.752, grad_norm=92.849, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.226e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 08:58:21,807 (trainer:732) INFO: 16epoch:train:6901-7000batch: iter_time=1.105e-04, forward_time=0.107, loss_ctc=69.044, loss_att=51.957, acc=0.681, loss=57.083, backward_time=0.751, grad_norm=85.863, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.223e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 09:00:01,383 (trainer:732) INFO: 16epoch:train:7001-7100batch: iter_time=1.136e-04, forward_time=0.108, loss_ctc=66.173, loss_att=51.594, acc=0.692, loss=55.968, backward_time=0.751, grad_norm=107.161, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.220e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 09:01:41,306 (trainer:732) INFO: 16epoch:train:7101-7200batch: iter_time=1.196e-04, forward_time=0.108, loss_ctc=68.998, loss_att=50.099, acc=0.697, loss=55.769, backward_time=0.751, grad_norm=84.671, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.217e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:03:21,067 (trainer:732) INFO: 16epoch:train:7201-7300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=83.622, loss_att=62.831, acc=0.690, loss=69.069, backward_time=0.751, grad_norm=94.437, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.213e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 09:05:00,740 (trainer:732) INFO: 16epoch:train:7301-7400batch: iter_time=1.076e-04, forward_time=0.108, loss_ctc=76.063, loss_att=61.450, acc=0.677, loss=65.834, backward_time=0.751, grad_norm=89.194, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.210e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 09:06:40,479 (trainer:732) INFO: 16epoch:train:7401-7500batch: iter_time=1.026e-04, forward_time=0.108, loss_ctc=74.348, loss_att=63.331, acc=0.691, loss=66.636, backward_time=0.752, grad_norm=94.746, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.112, optim0_lr0=9.207e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 09:06:50,888 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 09:07:09,872 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a390f3580>)
+[gpua003:0/64] 2023-07-06 09:07:13,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 09:07:13,350 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 09:12:31,328 (trainer:732) INFO: 16epoch:train:7501-7600batch: iter_time=1.366, forward_time=0.109, loss_ctc=70.718, loss_att=46.675, acc=0.709, loss=53.888, backward_time=0.765, grad_norm=84.044, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.204e-05, train_time=7.017
+[gpua003:0/64] 2023-07-06 09:14:11,524 (trainer:732) INFO: 16epoch:train:7601-7700batch: iter_time=1.096e-04, forward_time=0.109, loss_ctc=71.556, loss_att=49.715, acc=0.711, loss=56.267, backward_time=0.752, grad_norm=84.595, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.201e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 09:15:55,971 (trainer:732) INFO: 16epoch:train:7701-7800batch: iter_time=1.048e-04, forward_time=0.110, loss_ctc=63.324, loss_att=50.467, acc=0.700, loss=54.324, backward_time=0.762, grad_norm=79.834, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.198e-05, train_time=2.089
+[gpua003:0/64] 2023-07-06 09:17:38,069 (trainer:732) INFO: 16epoch:train:7801-7900batch: iter_time=1.107e-04, forward_time=0.109, loss_ctc=74.072, loss_att=55.493, acc=0.692, loss=61.067, backward_time=0.764, grad_norm=89.346, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.195e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 09:19:24,407 (trainer:732) INFO: 16epoch:train:7901-8000batch: iter_time=1.124e-04, forward_time=0.109, loss_ctc=63.418, loss_att=47.702, acc=0.707, loss=52.417, backward_time=0.766, grad_norm=74.133, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.192e-05, train_time=2.127
+[gpua003:0/64] 2023-07-06 09:21:04,320 (trainer:732) INFO: 16epoch:train:8001-8100batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=80.173, loss_att=56.871, acc=0.698, loss=63.862, backward_time=0.751, grad_norm=90.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.189e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:22:51,435 (trainer:732) INFO: 16epoch:train:8101-8200batch: iter_time=1.128e-04, forward_time=0.109, loss_ctc=82.878, loss_att=61.648, acc=0.703, loss=68.017, backward_time=0.757, grad_norm=96.301, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.185e-05, train_time=2.142
+[gpua003:0/64] 2023-07-06 09:24:32,016 (trainer:732) INFO: 16epoch:train:8201-8300batch: iter_time=1.174e-04, forward_time=0.109, loss_ctc=70.061, loss_att=57.659, acc=0.693, loss=61.380, backward_time=0.750, grad_norm=90.693, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.182e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 09:25:08,632 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 09:25:27,808 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8844ad3850>)
+[gpua003:0/64] 2023-07-06 09:25:31,292 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 09:25:31,298 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 09:30:43,049 (trainer:732) INFO: 16epoch:train:8301-8400batch: iter_time=1.323, forward_time=0.109, loss_ctc=78.228, loss_att=55.459, acc=0.696, loss=62.290, backward_time=0.766, grad_norm=102.805, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.179e-05, train_time=7.420
+[gpua003:0/64] 2023-07-06 09:32:24,140 (trainer:732) INFO: 16epoch:train:8401-8500batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=65.900, loss_att=49.152, acc=0.721, loss=54.176, backward_time=0.753, grad_norm=86.699, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.176e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 09:34:03,912 (trainer:732) INFO: 16epoch:train:8501-8600batch: iter_time=9.517e-05, forward_time=0.107, loss_ctc=64.248, loss_att=49.488, acc=0.696, loss=53.916, backward_time=0.752, grad_norm=92.458, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.173e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 09:35:44,278 (trainer:732) INFO: 16epoch:train:8601-8700batch: iter_time=1.134e-04, forward_time=0.109, loss_ctc=68.813, loss_att=50.173, acc=0.693, loss=55.765, backward_time=0.753, grad_norm=79.140, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.170e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 09:37:24,339 (trainer:732) INFO: 16epoch:train:8701-8800batch: iter_time=1.014e-04, forward_time=0.107, loss_ctc=63.927, loss_att=49.180, acc=0.703, loss=53.604, backward_time=0.752, grad_norm=74.079, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.167e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 09:39:04,178 (trainer:732) INFO: 16epoch:train:8801-8900batch: iter_time=9.886e-05, forward_time=0.107, loss_ctc=71.856, loss_att=54.471, acc=0.690, loss=59.686, backward_time=0.751, grad_norm=84.227, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.164e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 09:40:44,106 (trainer:732) INFO: 16epoch:train:8901-9000batch: iter_time=9.990e-05, forward_time=0.108, loss_ctc=83.183, loss_att=60.219, acc=0.697, loss=67.108, backward_time=0.752, grad_norm=105.843, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.161e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:42:23,729 (trainer:732) INFO: 16epoch:train:9001-9100batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=73.879, loss_att=62.193, acc=0.675, loss=65.699, backward_time=0.750, grad_norm=87.852, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.158e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 09:43:30,780 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 09:43:50,013 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8844af7850>)
+[gpua003:0/64] 2023-07-06 09:43:53,960 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 09:43:53,967 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 09:47:02,373 (trainer:732) INFO: 16epoch:train:9101-9200batch: iter_time=1.335, forward_time=0.108, loss_ctc=73.193, loss_att=55.038, acc=0.695, loss=60.485, backward_time=0.764, grad_norm=100.991, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.155e-05, train_time=5.573
+[gpua003:0/64] 2023-07-06 09:48:42,805 (trainer:732) INFO: 16epoch:train:9201-9300batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=70.916, loss_att=49.501, acc=0.717, loss=55.926, backward_time=0.755, grad_norm=111.947, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.152e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 09:50:23,695 (trainer:732) INFO: 16epoch:train:9301-9400batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=65.130, loss_att=48.335, acc=0.700, loss=53.374, backward_time=0.753, grad_norm=76.035, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.148e-05, train_time=2.018
+[gpua003:0/64] 2023-07-06 09:52:03,613 (trainer:732) INFO: 16epoch:train:9401-9500batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.151, loss_att=50.398, acc=0.699, loss=56.024, backward_time=0.752, grad_norm=82.318, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.111, optim0_lr0=9.145e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 09:53:43,866 (trainer:732) INFO: 16epoch:train:9501-9600batch: iter_time=9.826e-05, forward_time=0.108, loss_ctc=64.096, loss_att=50.574, acc=0.700, loss=54.631, backward_time=0.752, grad_norm=76.082, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.142e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 09:55:23,809 (trainer:732) INFO: 16epoch:train:9601-9700batch: iter_time=1.055e-04, forward_time=0.107, loss_ctc=68.502, loss_att=50.633, acc=0.705, loss=55.994, backward_time=0.752, grad_norm=90.453, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.139e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 09:57:03,795 (trainer:732) INFO: 16epoch:train:9701-9800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=83.554, loss_att=60.263, acc=0.706, loss=67.250, backward_time=0.752, grad_norm=121.058, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.136e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 09:58:43,540 (trainer:732) INFO: 16epoch:train:9801-9900batch: iter_time=1.058e-04, forward_time=0.107, loss_ctc=75.767, loss_att=61.630, acc=0.686, loss=65.871, backward_time=0.752, grad_norm=99.766, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.133e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 10:00:23,449 (trainer:732) INFO: 16epoch:train:9901-10000batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=73.492, loss_att=62.177, acc=0.702, loss=65.572, backward_time=0.752, grad_norm=87.104, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.130e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 10:13:27,455 (trainer:338) INFO: 16epoch results: [train] iter_time=0.188, forward_time=0.109, loss_ctc=72.648, loss_att=54.689, acc=0.694, loss=60.077, backward_time=0.755, grad_norm=91.310, clip=100.000, loss_scale=4.504e+15, optim_step_time=0.113, optim0_lr0=9.286e-05, train_time=2.587, time=3 hours, 35 minutes and 51.24 seconds, total_count=130000, gpu_max_cached_mem_GB=37.775, [valid] loss_ctc=53.067, cer_ctc=0.290, loss_att=44.062, acc=0.664, cer=0.358, wer=0.991, loss=46.763, time=6 minutes and 19.52 seconds, total_count=13662, gpu_max_cached_mem_GB=37.775, [att_plot] time=6 minutes and 30.6 seconds, total_count=0, gpu_max_cached_mem_GB=37.775
+[gpua003:0/64] 2023-07-06 10:13:43,422 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/11epoch.pth
+[gpua003:0/64] 2023-07-06 10:13:43,465 (trainer:272) INFO: 17/100epoch started. Estimated time to finish: 1 week, 6 days and 10 hours
+[gpua003:0/64] 2023-07-06 10:13:43,501 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 10:14:02,307 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 10:14:05,803 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9558507e80>)
+[gpua003:0/64] 2023-07-06 10:14:05,804 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 10:14:05,819 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 10:20:06,308 (trainer:732) INFO: 17epoch:train:1-100batch: iter_time=2.583, forward_time=0.213, loss_ctc=70.673, loss_att=58.961, acc=0.684, loss=62.475, backward_time=0.820, grad_norm=95.879, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.123, optim0_lr0=9.127e-05, train_time=7.656
+[gpua003:0/64] 2023-07-06 10:21:47,983 (trainer:732) INFO: 17epoch:train:101-200batch: iter_time=1.078e-04, forward_time=0.110, loss_ctc=72.857, loss_att=65.701, acc=0.683, loss=67.848, backward_time=0.753, grad_norm=87.783, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.124e-05, train_time=2.034
+[gpua003:0/64] 2023-07-06 10:23:43,718 (trainer:732) INFO: 17epoch:train:201-300batch: iter_time=3.269e-04, forward_time=0.202, loss_ctc=74.298, loss_att=62.216, acc=0.684, loss=65.841, backward_time=0.776, grad_norm=85.905, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.119, optim0_lr0=9.121e-05, train_time=2.314
+[gpua003:0/64] 2023-07-06 10:25:26,949 (trainer:732) INFO: 17epoch:train:301-400batch: iter_time=9.790e-05, forward_time=0.108, loss_ctc=72.162, loss_att=54.531, acc=0.685, loss=59.820, backward_time=0.756, grad_norm=86.861, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.118e-05, train_time=2.064
+[gpua003:0/64] 2023-07-06 10:27:13,540 (trainer:732) INFO: 17epoch:train:401-500batch: iter_time=1.134e-04, forward_time=0.120, loss_ctc=74.989, loss_att=52.743, acc=0.701, loss=59.416, backward_time=0.763, grad_norm=88.929, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.115e-05, train_time=2.132
+[gpua003:0/64] 2023-07-06 10:29:07,314 (trainer:732) INFO: 17epoch:train:501-600batch: iter_time=0.006, forward_time=0.165, loss_ctc=74.139, loss_att=57.626, acc=0.683, loss=62.580, backward_time=0.774, grad_norm=89.735, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.116, optim0_lr0=9.112e-05, train_time=2.275
+[gpua003:0/64] 2023-07-06 10:31:07,776 (trainer:732) INFO: 17epoch:train:601-700batch: iter_time=9.955e-05, forward_time=0.110, loss_ctc=75.682, loss_att=59.754, acc=0.684, loss=64.533, backward_time=0.796, grad_norm=95.065, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.109e-05, train_time=2.409
+[gpua003:0/64] 2023-07-06 10:33:08,425 (trainer:732) INFO: 17epoch:train:701-800batch: iter_time=0.001, forward_time=0.197, loss_ctc=73.468, loss_att=56.545, acc=0.693, loss=61.622, backward_time=0.799, grad_norm=89.781, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.117, optim0_lr0=9.106e-05, train_time=2.411
+[gpua003:0/64] 2023-07-06 10:33:52,355 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 10:34:11,454 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95640dcd90>)
+[gpua003:0/64] 2023-07-06 10:34:14,921 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 10:34:14,928 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 10:38:42,065 (trainer:732) INFO: 17epoch:train:801-900batch: iter_time=1.652, forward_time=0.108, loss_ctc=78.264, loss_att=59.917, acc=0.689, loss=65.421, backward_time=0.771, grad_norm=105.575, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.103e-05, train_time=6.674
+[gpua003:0/64] 2023-07-06 10:40:21,942 (trainer:732) INFO: 17epoch:train:901-1000batch: iter_time=9.108e-05, forward_time=0.107, loss_ctc=68.668, loss_att=64.753, acc=0.669, loss=65.927, backward_time=0.750, grad_norm=89.386, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.100e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 10:42:01,601 (trainer:732) INFO: 17epoch:train:1001-1100batch: iter_time=9.405e-05, forward_time=0.107, loss_ctc=73.564, loss_att=60.516, acc=0.685, loss=64.430, backward_time=0.750, grad_norm=93.150, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.097e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 10:43:41,772 (trainer:732) INFO: 17epoch:train:1101-1200batch: iter_time=9.308e-05, forward_time=0.107, loss_ctc=70.639, loss_att=56.045, acc=0.686, loss=60.423, backward_time=0.751, grad_norm=87.677, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.094e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 10:45:21,649 (trainer:732) INFO: 17epoch:train:1201-1300batch: iter_time=9.487e-05, forward_time=0.107, loss_ctc=71.051, loss_att=55.501, acc=0.686, loss=60.166, backward_time=0.752, grad_norm=91.712, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.112, optim0_lr0=9.091e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 10:47:01,483 (trainer:732) INFO: 17epoch:train:1301-1400batch: iter_time=9.825e-05, forward_time=0.108, loss_ctc=78.942, loss_att=56.523, acc=0.684, loss=63.249, backward_time=0.752, grad_norm=90.368, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.088e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 10:48:41,812 (trainer:732) INFO: 17epoch:train:1401-1500batch: iter_time=1.015e-04, forward_time=0.114, loss_ctc=76.055, loss_att=56.659, acc=0.684, loss=62.478, backward_time=0.752, grad_norm=88.888, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.085e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 10:50:21,503 (trainer:732) INFO: 17epoch:train:1501-1600batch: iter_time=1.011e-04, forward_time=0.106, loss_ctc=72.092, loss_att=59.329, acc=0.681, loss=63.157, backward_time=0.751, grad_norm=83.857, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.082e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 10:51:28,539 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 10:51:47,761 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 10:51:51,243 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e72b7d00>)
+[gpua003:0/64] 2023-07-06 10:51:51,244 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 10:51:51,250 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 10:55:23,750 (trainer:732) INFO: 17epoch:train:1601-1700batch: iter_time=1.339, forward_time=0.136, loss_ctc=75.727, loss_att=56.568, acc=0.694, loss=62.316, backward_time=0.762, grad_norm=98.984, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.114, optim0_lr0=9.079e-05, train_time=6.043
+[gpua003:0/64] 2023-07-06 10:57:04,140 (trainer:732) INFO: 17epoch:train:1701-1800batch: iter_time=1.070e-04, forward_time=0.110, loss_ctc=65.293, loss_att=58.713, acc=0.683, loss=60.687, backward_time=0.755, grad_norm=89.446, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.076e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 10:58:44,084 (trainer:732) INFO: 17epoch:train:1801-1900batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=74.486, loss_att=67.076, acc=0.685, loss=69.299, backward_time=0.753, grad_norm=99.307, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.073e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 11:00:24,124 (trainer:732) INFO: 17epoch:train:1901-2000batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=74.341, loss_att=60.362, acc=0.692, loss=64.556, backward_time=0.753, grad_norm=87.026, clip=100.000, loss_scale=9.007e+15, optim_step_time=0.113, optim0_lr0=9.070e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 11:02:04,044 (trainer:732) INFO: 17epoch:train:2001-2100batch: iter_time=1.014e-04, forward_time=0.108, loss_ctc=66.178, loss_att=49.767, acc=0.697, loss=54.690, backward_time=0.753, grad_norm=86.555, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.067e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 11:03:43,843 (trainer:732) INFO: 17epoch:train:2101-2200batch: iter_time=9.901e-05, forward_time=0.109, loss_ctc=77.231, loss_att=58.176, acc=0.697, loss=63.892, backward_time=0.752, grad_norm=93.954, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.064e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:05:23,555 (trainer:732) INFO: 17epoch:train:2201-2300batch: iter_time=9.750e-05, forward_time=0.108, loss_ctc=75.802, loss_att=57.441, acc=0.693, loss=62.949, backward_time=0.751, grad_norm=89.231, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.061e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:07:03,243 (trainer:732) INFO: 17epoch:train:2301-2400batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=70.913, loss_att=52.438, acc=0.689, loss=57.980, backward_time=0.751, grad_norm=110.095, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.058e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:09:01,745 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 11:09:21,136 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f950d8afe50>)
+[gpua003:0/64] 2023-07-06 11:09:24,685 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 11:09:24,692 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 11:12:11,094 (trainer:732) INFO: 17epoch:train:2401-2500batch: iter_time=1.565, forward_time=0.109, loss_ctc=75.267, loss_att=58.496, acc=0.696, loss=63.527, backward_time=0.755, grad_norm=104.336, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.055e-05, train_time=6.157
+[gpua003:0/64] 2023-07-06 11:13:53,368 (trainer:732) INFO: 17epoch:train:2501-2600batch: iter_time=1.102e-04, forward_time=0.108, loss_ctc=70.363, loss_att=57.552, acc=0.686, loss=61.395, backward_time=0.759, grad_norm=92.372, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.052e-05, train_time=2.045
+[gpua003:0/64] 2023-07-06 11:15:33,128 (trainer:732) INFO: 17epoch:train:2601-2700batch: iter_time=1.054e-04, forward_time=0.108, loss_ctc=71.407, loss_att=65.980, acc=0.682, loss=67.608, backward_time=0.750, grad_norm=88.759, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.049e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 11:17:13,072 (trainer:732) INFO: 17epoch:train:2701-2800batch: iter_time=1.124e-04, forward_time=0.108, loss_ctc=73.031, loss_att=60.033, acc=0.686, loss=63.933, backward_time=0.751, grad_norm=87.774, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.046e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 11:18:52,468 (trainer:732) INFO: 17epoch:train:2801-2900batch: iter_time=1.132e-04, forward_time=0.106, loss_ctc=70.091, loss_att=54.398, acc=0.683, loss=59.106, backward_time=0.749, grad_norm=91.603, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.043e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 11:20:32,129 (trainer:732) INFO: 17epoch:train:2901-3000batch: iter_time=1.165e-04, forward_time=0.108, loss_ctc=74.247, loss_att=52.240, acc=0.696, loss=58.842, backward_time=0.750, grad_norm=94.573, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.040e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 11:22:11,683 (trainer:732) INFO: 17epoch:train:3001-3100batch: iter_time=1.201e-04, forward_time=0.107, loss_ctc=73.832, loss_att=58.188, acc=0.682, loss=62.881, backward_time=0.749, grad_norm=91.783, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.037e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 11:23:51,407 (trainer:732) INFO: 17epoch:train:3101-3200batch: iter_time=1.178e-04, forward_time=0.108, loss_ctc=72.306, loss_att=57.425, acc=0.683, loss=61.889, backward_time=0.751, grad_norm=96.037, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.034e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:25:30,906 (trainer:732) INFO: 17epoch:train:3201-3300batch: iter_time=1.122e-04, forward_time=0.107, loss_ctc=73.002, loss_att=56.253, acc=0.694, loss=61.278, backward_time=0.750, grad_norm=94.698, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.031e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 11:26:04,198 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 11:26:23,282 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9521c796c0>)
+[gpua003:0/64] 2023-07-06 11:26:26,814 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 11:26:26,821 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 11:32:52,064 (trainer:732) INFO: 17epoch:train:3301-3400batch: iter_time=1.297, forward_time=0.107, loss_ctc=74.660, loss_att=57.798, acc=0.692, loss=62.857, backward_time=0.766, grad_norm=124.515, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.028e-05, train_time=8.823
+[gpua003:0/64] 2023-07-06 11:34:32,228 (trainer:732) INFO: 17epoch:train:3401-3500batch: iter_time=1.056e-04, forward_time=0.108, loss_ctc=67.842, loss_att=63.769, acc=0.687, loss=64.991, backward_time=0.753, grad_norm=85.874, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.025e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 11:36:12,063 (trainer:732) INFO: 17epoch:train:3501-3600batch: iter_time=9.523e-05, forward_time=0.108, loss_ctc=71.756, loss_att=59.431, acc=0.693, loss=63.129, backward_time=0.752, grad_norm=81.853, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.022e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:37:52,081 (trainer:732) INFO: 17epoch:train:3601-3700batch: iter_time=8.824e-05, forward_time=0.108, loss_ctc=71.855, loss_att=55.535, acc=0.697, loss=60.431, backward_time=0.752, grad_norm=82.300, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=9.020e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 11:39:37,163 (trainer:732) INFO: 17epoch:train:3701-3800batch: iter_time=9.169e-05, forward_time=0.108, loss_ctc=69.667, loss_att=54.813, acc=0.695, loss=59.269, backward_time=0.759, grad_norm=90.155, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.017e-05, train_time=2.101
+[gpua003:0/64] 2023-07-06 11:41:16,991 (trainer:732) INFO: 17epoch:train:3801-3900batch: iter_time=9.824e-05, forward_time=0.107, loss_ctc=77.697, loss_att=55.817, acc=0.700, loss=62.381, backward_time=0.751, grad_norm=89.894, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.014e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:42:56,825 (trainer:732) INFO: 17epoch:train:3901-4000batch: iter_time=9.875e-05, forward_time=0.107, loss_ctc=73.227, loss_att=54.909, acc=0.698, loss=60.404, backward_time=0.751, grad_norm=98.795, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.011e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:44:38,362 (trainer:732) INFO: 17epoch:train:4001-4100batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.543, loss_att=58.175, acc=0.689, loss=62.485, backward_time=0.755, grad_norm=89.796, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.008e-05, train_time=2.031
+[gpua003:0/64] 2023-07-06 11:45:44,663 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 11:46:03,824 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520e1be20>)
+[gpua003:0/64] 2023-07-06 11:46:07,363 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 11:46:07,369 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 11:51:11,190 (trainer:732) INFO: 17epoch:train:4101-4200batch: iter_time=1.302, forward_time=0.107, loss_ctc=75.093, loss_att=55.212, acc=0.696, loss=61.176, backward_time=0.773, grad_norm=88.273, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.005e-05, train_time=7.856
+[gpua003:0/64] 2023-07-06 11:52:51,935 (trainer:732) INFO: 17epoch:train:4201-4300batch: iter_time=9.852e-05, forward_time=0.108, loss_ctc=66.394, loss_att=59.086, acc=0.681, loss=61.279, backward_time=0.754, grad_norm=84.241, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=9.002e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 11:54:31,640 (trainer:732) INFO: 17epoch:train:4301-4400batch: iter_time=9.413e-05, forward_time=0.107, loss_ctc=71.953, loss_att=65.497, acc=0.672, loss=67.434, backward_time=0.751, grad_norm=102.244, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.999e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 11:56:11,417 (trainer:732) INFO: 17epoch:train:4401-4500batch: iter_time=9.684e-05, forward_time=0.107, loss_ctc=72.922, loss_att=58.511, acc=0.684, loss=62.834, backward_time=0.751, grad_norm=97.141, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.996e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 11:57:51,231 (trainer:732) INFO: 17epoch:train:4501-4600batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=70.213, loss_att=52.171, acc=0.696, loss=57.583, backward_time=0.751, grad_norm=81.295, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.993e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 11:59:30,948 (trainer:732) INFO: 17epoch:train:4601-4700batch: iter_time=1.024e-04, forward_time=0.107, loss_ctc=80.215, loss_att=60.929, acc=0.678, loss=66.715, backward_time=0.751, grad_norm=101.501, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.990e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 12:01:10,656 (trainer:732) INFO: 17epoch:train:4701-4800batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=72.860, loss_att=54.463, acc=0.685, loss=59.982, backward_time=0.750, grad_norm=86.781, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.987e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 12:02:50,263 (trainer:732) INFO: 17epoch:train:4801-4900batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=72.023, loss_att=55.370, acc=0.691, loss=60.366, backward_time=0.751, grad_norm=80.330, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.985e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 12:04:30,172 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 12:04:49,660 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952b8134f0>)
+[gpua003:0/64] 2023-07-06 12:04:53,209 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 12:04:53,232 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 12:09:03,264 (trainer:732) INFO: 17epoch:train:4901-5000batch: iter_time=2.218, forward_time=0.107, loss_ctc=69.690, loss_att=56.365, acc=0.679, loss=60.363, backward_time=0.760, grad_norm=86.852, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.112, optim0_lr0=8.982e-05, train_time=7.460
+[gpua003:0/64] 2023-07-06 12:10:45,243 (trainer:732) INFO: 17epoch:train:5001-5100batch: iter_time=9.156e-05, forward_time=0.108, loss_ctc=69.222, loss_att=56.473, acc=0.696, loss=60.298, backward_time=0.760, grad_norm=92.071, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.979e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 12:12:25,764 (trainer:732) INFO: 17epoch:train:5101-5200batch: iter_time=1.022e-04, forward_time=0.108, loss_ctc=70.202, loss_att=64.051, acc=0.686, loss=65.896, backward_time=0.754, grad_norm=83.538, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.976e-05, train_time=2.010
+[gpua003:0/64] 2023-07-06 12:14:28,497 (trainer:732) INFO: 17epoch:train:5201-5300batch: iter_time=1.115e-04, forward_time=0.108, loss_ctc=72.332, loss_att=60.244, acc=0.687, loss=63.870, backward_time=0.813, grad_norm=91.528, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.973e-05, train_time=2.454
+[gpua003:0/64] 2023-07-06 12:16:08,436 (trainer:732) INFO: 17epoch:train:5301-5400batch: iter_time=8.814e-05, forward_time=0.109, loss_ctc=69.588, loss_att=52.565, acc=0.687, loss=57.672, backward_time=0.753, grad_norm=77.684, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.970e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 12:17:52,763 (trainer:732) INFO: 17epoch:train:5401-5500batch: iter_time=8.935e-05, forward_time=0.109, loss_ctc=73.912, loss_att=52.422, acc=0.696, loss=58.869, backward_time=0.776, grad_norm=84.516, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.967e-05, train_time=2.086
+[gpua003:0/64] 2023-07-06 12:19:32,754 (trainer:732) INFO: 17epoch:train:5501-5600batch: iter_time=8.392e-05, forward_time=0.108, loss_ctc=73.547, loss_att=57.855, acc=0.682, loss=62.562, backward_time=0.753, grad_norm=96.965, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.964e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 12:21:12,375 (trainer:732) INFO: 17epoch:train:5601-5700batch: iter_time=1.111e-04, forward_time=0.108, loss_ctc=71.195, loss_att=56.412, acc=0.687, loss=60.847, backward_time=0.751, grad_norm=116.013, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.961e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 12:22:52,067 (trainer:732) INFO: 17epoch:train:5701-5800batch: iter_time=8.850e-05, forward_time=0.108, loss_ctc=71.813, loss_att=55.666, acc=0.695, loss=60.510, backward_time=0.751, grad_norm=115.475, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.113, optim0_lr0=8.959e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 12:23:25,320 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 12:23:44,500 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a62b5b0>)
+[gpua003:0/64] 2023-07-06 12:23:48,062 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 12:23:48,068 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 12:29:41,148 (trainer:732) INFO: 17epoch:train:5801-5900batch: iter_time=1.954, forward_time=0.110, loss_ctc=75.608, loss_att=57.604, acc=0.698, loss=63.005, backward_time=0.765, grad_norm=95.989, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.956e-05, train_time=8.181
+[gpua003:0/64] 2023-07-06 12:31:32,248 (trainer:732) INFO: 17epoch:train:5901-6000batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=65.942, loss_att=62.877, acc=0.691, loss=63.796, backward_time=0.780, grad_norm=90.009, clip=100.000, loss_scale=1.801e+16, optim_step_time=0.114, optim0_lr0=8.953e-05, train_time=2.222
+[gpua003:0/64] 2023-07-06 12:33:12,332 (trainer:732) INFO: 17epoch:train:6001-6100batch: iter_time=1.069e-04, forward_time=0.110, loss_ctc=71.278, loss_att=58.934, acc=0.693, loss=62.637, backward_time=0.753, grad_norm=83.756, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.950e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 12:34:52,381 (trainer:732) INFO: 17epoch:train:6101-6200batch: iter_time=1.058e-04, forward_time=0.110, loss_ctc=70.544, loss_att=53.727, acc=0.704, loss=58.772, backward_time=0.753, grad_norm=79.862, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.947e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 12:36:34,193 (trainer:732) INFO: 17epoch:train:6201-6300batch: iter_time=1.036e-04, forward_time=0.110, loss_ctc=69.284, loss_att=54.871, acc=0.698, loss=59.195, backward_time=0.759, grad_norm=80.439, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.944e-05, train_time=2.036
+[gpua003:0/64] 2023-07-06 12:38:16,533 (trainer:732) INFO: 17epoch:train:6301-6400batch: iter_time=1.054e-04, forward_time=0.110, loss_ctc=76.122, loss_att=54.596, acc=0.703, loss=61.054, backward_time=0.756, grad_norm=92.990, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.941e-05, train_time=2.047
+[gpua003:0/64] 2023-07-06 12:40:03,767 (trainer:732) INFO: 17epoch:train:6401-6500batch: iter_time=1.067e-04, forward_time=0.109, loss_ctc=72.416, loss_att=54.767, acc=0.698, loss=60.062, backward_time=0.760, grad_norm=82.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.938e-05, train_time=2.144
+[gpua003:0/64] 2023-07-06 12:41:43,845 (trainer:732) INFO: 17epoch:train:6501-6600batch: iter_time=1.023e-04, forward_time=0.109, loss_ctc=69.309, loss_att=56.242, acc=0.693, loss=60.162, backward_time=0.752, grad_norm=84.956, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.936e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 12:42:53,074 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 12:43:12,502 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8aa662f970>)
+[gpua003:0/64] 2023-07-06 12:43:16,051 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 12:43:16,058 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 12:48:47,376 (trainer:732) INFO: 17epoch:train:6601-6700batch: iter_time=1.326, forward_time=0.109, loss_ctc=73.679, loss_att=54.146, acc=0.702, loss=60.006, backward_time=0.774, grad_norm=87.071, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.933e-05, train_time=8.470
+[gpua003:0/64] 2023-07-06 12:50:28,471 (trainer:732) INFO: 17epoch:train:6701-6800batch: iter_time=9.504e-05, forward_time=0.109, loss_ctc=66.009, loss_att=56.529, acc=0.690, loss=59.373, backward_time=0.754, grad_norm=86.611, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.930e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 12:52:08,406 (trainer:732) INFO: 17epoch:train:6801-6900batch: iter_time=9.435e-05, forward_time=0.109, loss_ctc=71.563, loss_att=64.197, acc=0.675, loss=66.407, backward_time=0.752, grad_norm=91.386, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.927e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 12:53:48,230 (trainer:732) INFO: 17epoch:train:6901-7000batch: iter_time=1.083e-04, forward_time=0.108, loss_ctc=70.739, loss_att=56.404, acc=0.690, loss=60.704, backward_time=0.752, grad_norm=77.527, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.924e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 12:55:28,052 (trainer:732) INFO: 17epoch:train:7001-7100batch: iter_time=1.054e-04, forward_time=0.109, loss_ctc=67.590, loss_att=51.543, acc=0.698, loss=56.357, backward_time=0.752, grad_norm=86.798, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.921e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 12:57:07,643 (trainer:732) INFO: 17epoch:train:7101-7200batch: iter_time=1.132e-04, forward_time=0.108, loss_ctc=78.963, loss_att=61.659, acc=0.674, loss=66.850, backward_time=0.751, grad_norm=88.590, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.919e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 12:58:47,336 (trainer:732) INFO: 17epoch:train:7201-7300batch: iter_time=1.007e-04, forward_time=0.109, loss_ctc=71.829, loss_att=53.054, acc=0.688, loss=58.686, backward_time=0.752, grad_norm=99.178, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.916e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 13:00:27,154 (trainer:732) INFO: 17epoch:train:7301-7400batch: iter_time=9.624e-05, forward_time=0.110, loss_ctc=70.807, loss_att=54.983, acc=0.694, loss=59.730, backward_time=0.752, grad_norm=96.232, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.913e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 13:02:07,410 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 13:02:26,581 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8ff3c339d0>)
+[gpua003:0/64] 2023-07-06 13:02:30,447 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 13:02:30,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 13:06:49,822 (trainer:732) INFO: 17epoch:train:7401-7500batch: iter_time=1.317, forward_time=0.107, loss_ctc=69.012, loss_att=56.036, acc=0.683, loss=59.929, backward_time=0.766, grad_norm=92.081, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.910e-05, train_time=7.653
+[gpua003:0/64] 2023-07-06 13:08:33,082 (trainer:732) INFO: 17epoch:train:7501-7600batch: iter_time=1.091e-04, forward_time=0.109, loss_ctc=70.374, loss_att=56.800, acc=0.695, loss=60.872, backward_time=0.760, grad_norm=93.389, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.907e-05, train_time=2.065
+[gpua003:0/64] 2023-07-06 13:10:13,369 (trainer:732) INFO: 17epoch:train:7601-7700batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=71.632, loss_att=65.224, acc=0.684, loss=67.146, backward_time=0.753, grad_norm=93.615, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.904e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 13:11:53,366 (trainer:732) INFO: 17epoch:train:7701-7800batch: iter_time=1.042e-04, forward_time=0.108, loss_ctc=71.948, loss_att=59.571, acc=0.687, loss=63.284, backward_time=0.753, grad_norm=89.530, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.902e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 13:13:33,022 (trainer:732) INFO: 17epoch:train:7801-7900batch: iter_time=1.008e-04, forward_time=0.106, loss_ctc=71.162, loss_att=53.846, acc=0.687, loss=59.041, backward_time=0.751, grad_norm=93.637, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.899e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:15:12,530 (trainer:732) INFO: 17epoch:train:7901-8000batch: iter_time=1.035e-04, forward_time=0.106, loss_ctc=73.768, loss_att=51.961, acc=0.697, loss=58.503, backward_time=0.750, grad_norm=93.453, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.896e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 13:16:52,321 (trainer:732) INFO: 17epoch:train:8001-8100batch: iter_time=1.051e-04, forward_time=0.107, loss_ctc=72.484, loss_att=57.703, acc=0.684, loss=62.137, backward_time=0.751, grad_norm=92.203, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.893e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 13:18:31,917 (trainer:732) INFO: 17epoch:train:8101-8200batch: iter_time=1.031e-04, forward_time=0.106, loss_ctc=69.942, loss_att=55.765, acc=0.690, loss=60.018, backward_time=0.751, grad_norm=85.911, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.890e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 13:20:16,333 (trainer:732) INFO: 17epoch:train:8201-8300batch: iter_time=1.102e-04, forward_time=0.113, loss_ctc=69.944, loss_att=53.888, acc=0.702, loss=58.705, backward_time=0.762, grad_norm=93.148, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.887e-05, train_time=2.088
+[gpua003:0/64] 2023-07-06 13:20:50,086 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 13:21:09,066 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f1a9de740>)
+[gpua003:0/64] 2023-07-06 13:21:12,570 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 13:21:12,576 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 13:25:44,808 (trainer:732) INFO: 17epoch:train:8301-8400batch: iter_time=1.287, forward_time=0.108, loss_ctc=72.637, loss_att=55.648, acc=0.694, loss=60.745, backward_time=0.776, grad_norm=93.234, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.885e-05, train_time=6.569
+[gpua003:0/64] 2023-07-06 13:27:26,151 (trainer:732) INFO: 17epoch:train:8401-8500batch: iter_time=9.020e-05, forward_time=0.108, loss_ctc=71.200, loss_att=66.006, acc=0.691, loss=67.564, backward_time=0.752, grad_norm=84.887, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.882e-05, train_time=2.027
+[gpua003:0/64] 2023-07-06 13:29:06,443 (trainer:732) INFO: 17epoch:train:8501-8600batch: iter_time=9.487e-05, forward_time=0.108, loss_ctc=69.241, loss_att=57.750, acc=0.691, loss=61.197, backward_time=0.752, grad_norm=80.923, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.879e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 13:30:46,540 (trainer:732) INFO: 17epoch:train:8601-8700batch: iter_time=9.387e-05, forward_time=0.108, loss_ctc=71.052, loss_att=54.608, acc=0.696, loss=59.541, backward_time=0.752, grad_norm=81.533, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.876e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 13:32:26,237 (trainer:732) INFO: 17epoch:train:8701-8800batch: iter_time=8.946e-05, forward_time=0.107, loss_ctc=68.519, loss_att=53.218, acc=0.703, loss=57.809, backward_time=0.751, grad_norm=87.296, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.873e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 13:34:06,088 (trainer:732) INFO: 17epoch:train:8801-8900batch: iter_time=9.110e-05, forward_time=0.108, loss_ctc=75.321, loss_att=56.064, acc=0.703, loss=61.841, backward_time=0.752, grad_norm=83.253, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.871e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 13:35:45,773 (trainer:732) INFO: 17epoch:train:8901-9000batch: iter_time=8.413e-05, forward_time=0.107, loss_ctc=71.840, loss_att=52.405, acc=0.690, loss=58.236, backward_time=0.751, grad_norm=98.759, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.868e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:37:25,621 (trainer:732) INFO: 17epoch:train:9001-9100batch: iter_time=8.465e-05, forward_time=0.107, loss_ctc=67.828, loss_att=57.185, acc=0.695, loss=60.378, backward_time=0.753, grad_norm=87.054, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.865e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 13:38:32,627 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 13:38:52,045 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952b246f80>)
+[gpua003:0/64] 2023-07-06 13:38:55,588 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 13:38:55,594 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 13:43:25,159 (trainer:732) INFO: 17epoch:train:9101-9200batch: iter_time=1.294, forward_time=0.108, loss_ctc=73.928, loss_att=56.389, acc=0.694, loss=61.651, backward_time=0.764, grad_norm=128.846, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.113, optim0_lr0=8.862e-05, train_time=7.191
+[gpua003:0/64] 2023-07-06 13:45:08,132 (trainer:732) INFO: 17epoch:train:9201-9300batch: iter_time=9.071e-05, forward_time=0.106, loss_ctc=66.150, loss_att=58.093, acc=0.695, loss=60.510, backward_time=0.763, grad_norm=85.331, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.860e-05, train_time=2.059
+[gpua003:0/64] 2023-07-06 13:46:48,580 (trainer:732) INFO: 17epoch:train:9301-9400batch: iter_time=9.128e-05, forward_time=0.106, loss_ctc=69.745, loss_att=63.280, acc=0.688, loss=65.219, backward_time=0.751, grad_norm=101.929, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.857e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 13:48:37,657 (trainer:732) INFO: 17epoch:train:9401-9500batch: iter_time=8.943e-05, forward_time=0.106, loss_ctc=70.169, loss_att=55.042, acc=0.701, loss=59.580, backward_time=0.766, grad_norm=83.719, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.854e-05, train_time=2.181
+[gpua003:0/64] 2023-07-06 13:50:17,323 (trainer:732) INFO: 17epoch:train:9501-9600batch: iter_time=9.534e-05, forward_time=0.106, loss_ctc=68.339, loss_att=52.207, acc=0.704, loss=57.047, backward_time=0.751, grad_norm=77.700, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.851e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:51:57,215 (trainer:732) INFO: 17epoch:train:9601-9700batch: iter_time=9.239e-05, forward_time=0.106, loss_ctc=78.747, loss_att=60.575, acc=0.695, loss=66.026, backward_time=0.751, grad_norm=98.273, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.848e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 13:53:36,907 (trainer:732) INFO: 17epoch:train:9701-9800batch: iter_time=9.160e-05, forward_time=0.107, loss_ctc=71.624, loss_att=54.061, acc=0.696, loss=59.330, backward_time=0.750, grad_norm=94.224, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.846e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 13:55:16,555 (trainer:732) INFO: 17epoch:train:9801-9900batch: iter_time=8.975e-05, forward_time=0.106, loss_ctc=70.198, loss_att=52.936, acc=0.705, loss=58.115, backward_time=0.751, grad_norm=84.449, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.843e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 13:56:56,201 (trainer:732) INFO: 17epoch:train:9901-10000batch: iter_time=8.856e-05, forward_time=0.107, loss_ctc=71.418, loss_att=58.211, acc=0.687, loss=62.173, backward_time=0.750, grad_norm=99.355, clip=100.000, loss_scale=3.603e+16, optim_step_time=0.112, optim0_lr0=8.840e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 14:09:08,370 (trainer:338) INFO: 17epoch results: [train] iter_time=0.192, forward_time=0.112, loss_ctc=72.034, loss_att=57.326, acc=0.690, loss=61.738, backward_time=0.758, grad_norm=91.386, clip=100.000, loss_scale=2.342e+16, optim_step_time=0.113, optim0_lr0=8.981e-05, train_time=2.678, time=3 hours, 43 minutes and 23.37 seconds, total_count=140000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=58.336, cer_ctc=0.306, loss_att=49.239, acc=0.657, cer=0.353, wer=0.988, loss=51.968, time=5 minutes and 55.68 seconds, total_count=14674, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 5.85 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-06 14:09:27,436 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-06 14:09:27,626 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/12epoch.pth
+[gpua003:0/64] 2023-07-06 14:09:27,714 (trainer:272) INFO: 18/100epoch started. Estimated time to finish: 1 week, 6 days and 8 hours
+[gpua003:0/64] 2023-07-06 14:09:29,063 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 14:09:48,025 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97b4d36f80>)
+[gpua003:0/64] 2023-07-06 14:09:53,240 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 14:09:53,338 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 14:19:30,835 (trainer:732) INFO: 18epoch:train:1-100batch: iter_time=4.929, forward_time=0.153, loss_ctc=76.115, loss_att=63.275, acc=0.682, loss=67.127, backward_time=0.773, grad_norm=114.970, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.116, optim0_lr0=8.837e-05, train_time=12.048
+[gpua003:0/64] 2023-07-06 14:21:10,892 (trainer:732) INFO: 18epoch:train:101-200batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=78.407, loss_att=64.441, acc=0.685, loss=68.631, backward_time=0.752, grad_norm=128.032, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.835e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 14:22:53,009 (trainer:732) INFO: 18epoch:train:201-300batch: iter_time=9.913e-05, forward_time=0.108, loss_ctc=66.363, loss_att=49.349, acc=0.706, loss=54.453, backward_time=0.751, grad_norm=82.992, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.832e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 14:24:32,622 (trainer:732) INFO: 18epoch:train:301-400batch: iter_time=8.736e-05, forward_time=0.107, loss_ctc=85.395, loss_att=58.982, acc=0.692, loss=66.906, backward_time=0.752, grad_norm=105.306, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.829e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 14:26:14,069 (trainer:732) INFO: 18epoch:train:401-500batch: iter_time=8.796e-05, forward_time=0.107, loss_ctc=74.067, loss_att=58.678, acc=0.674, loss=63.295, backward_time=0.752, grad_norm=98.618, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.826e-05, train_time=2.029
+[gpua003:0/64] 2023-07-06 14:27:54,124 (trainer:732) INFO: 18epoch:train:501-600batch: iter_time=9.281e-05, forward_time=0.107, loss_ctc=85.710, loss_att=69.303, acc=0.665, loss=74.225, backward_time=0.753, grad_norm=104.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.824e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 14:29:33,936 (trainer:732) INFO: 18epoch:train:601-700batch: iter_time=8.948e-05, forward_time=0.107, loss_ctc=68.980, loss_att=52.042, acc=0.680, loss=57.123, backward_time=0.752, grad_norm=124.515, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.821e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 14:31:21,706 (trainer:732) INFO: 18epoch:train:701-800batch: iter_time=3.960e-04, forward_time=0.142, loss_ctc=86.717, loss_att=65.821, acc=0.661, loss=72.090, backward_time=0.767, grad_norm=105.973, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.818e-05, train_time=2.152
+[gpua003:0/64] 2023-07-06 14:32:09,857 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 14:32:28,641 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e86d3b20>)
+[gpua003:0/64] 2023-07-06 14:32:32,360 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 14:32:32,366 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 14:36:07,379 (trainer:732) INFO: 18epoch:train:801-900batch: iter_time=1.622, forward_time=0.152, loss_ctc=74.970, loss_att=58.171, acc=0.683, loss=63.211, backward_time=0.778, grad_norm=89.744, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.115, optim0_lr0=8.815e-05, train_time=5.717
+[gpua003:0/64] 2023-07-06 14:37:47,747 (trainer:732) INFO: 18epoch:train:901-1000batch: iter_time=9.918e-05, forward_time=0.108, loss_ctc=75.573, loss_att=63.432, acc=0.686, loss=67.074, backward_time=0.752, grad_norm=104.391, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.813e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 14:39:27,570 (trainer:732) INFO: 18epoch:train:1001-1100batch: iter_time=9.562e-05, forward_time=0.108, loss_ctc=71.935, loss_att=56.850, acc=0.696, loss=61.375, backward_time=0.751, grad_norm=93.037, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.810e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 14:41:07,498 (trainer:732) INFO: 18epoch:train:1101-1200batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=73.347, loss_att=52.973, acc=0.703, loss=59.086, backward_time=0.751, grad_norm=99.082, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.807e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 14:42:47,249 (trainer:732) INFO: 18epoch:train:1201-1300batch: iter_time=9.793e-05, forward_time=0.108, loss_ctc=83.048, loss_att=61.707, acc=0.686, loss=68.110, backward_time=0.750, grad_norm=102.821, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.804e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 14:44:27,022 (trainer:732) INFO: 18epoch:train:1301-1400batch: iter_time=1.028e-04, forward_time=0.108, loss_ctc=78.082, loss_att=59.145, acc=0.672, loss=64.826, backward_time=0.751, grad_norm=120.118, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.802e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 14:46:06,792 (trainer:732) INFO: 18epoch:train:1401-1500batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=77.767, loss_att=61.606, acc=0.674, loss=66.454, backward_time=0.752, grad_norm=122.959, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.799e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 14:47:46,697 (trainer:732) INFO: 18epoch:train:1501-1600batch: iter_time=9.485e-05, forward_time=0.110, loss_ctc=73.758, loss_att=56.709, acc=0.684, loss=61.824, backward_time=0.753, grad_norm=95.013, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.796e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 14:48:53,758 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 14:49:13,084 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9521e44ca0>)
+[gpua003:0/64] 2023-07-06 14:49:16,877 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 14:49:16,883 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 14:53:39,624 (trainer:732) INFO: 18epoch:train:1601-1700batch: iter_time=1.292, forward_time=0.109, loss_ctc=77.398, loss_att=58.944, acc=0.666, loss=64.480, backward_time=0.761, grad_norm=104.361, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.794e-05, train_time=7.058
+[gpua003:0/64] 2023-07-06 14:55:19,937 (trainer:732) INFO: 18epoch:train:1701-1800batch: iter_time=1.040e-04, forward_time=0.108, loss_ctc=72.292, loss_att=58.658, acc=0.692, loss=62.748, backward_time=0.754, grad_norm=90.979, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.791e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 14:57:00,068 (trainer:732) INFO: 18epoch:train:1801-1900batch: iter_time=9.482e-05, forward_time=0.108, loss_ctc=75.435, loss_att=59.110, acc=0.697, loss=64.008, backward_time=0.753, grad_norm=94.982, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.788e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 14:58:41,886 (trainer:732) INFO: 18epoch:train:1901-2000batch: iter_time=8.814e-05, forward_time=0.108, loss_ctc=66.463, loss_att=50.107, acc=0.714, loss=55.014, backward_time=0.753, grad_norm=94.405, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.785e-05, train_time=2.036
+[gpua003:0/64] 2023-07-06 15:00:21,511 (trainer:732) INFO: 18epoch:train:2001-2100batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=87.948, loss_att=64.112, acc=0.676, loss=71.263, backward_time=0.751, grad_norm=117.699, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.783e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 15:02:01,222 (trainer:732) INFO: 18epoch:train:2101-2200batch: iter_time=8.983e-05, forward_time=0.108, loss_ctc=78.447, loss_att=60.484, acc=0.681, loss=65.873, backward_time=0.752, grad_norm=100.916, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.780e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:03:48,764 (trainer:732) INFO: 18epoch:train:2201-2300batch: iter_time=9.119e-05, forward_time=0.107, loss_ctc=79.251, loss_att=62.785, acc=0.668, loss=67.725, backward_time=0.760, grad_norm=93.330, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.777e-05, train_time=2.151
+[gpua003:0/64] 2023-07-06 15:05:58,569 (trainer:732) INFO: 18epoch:train:2301-2400batch: iter_time=8.569e-05, forward_time=0.108, loss_ctc=66.068, loss_att=48.122, acc=0.690, loss=53.506, backward_time=0.810, grad_norm=98.868, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.775e-05, train_time=2.596
+[gpua003:0/64] 2023-07-06 15:08:21,871 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 15:08:41,122 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e86f0430>)
+[gpua003:0/64] 2023-07-06 15:08:44,919 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 15:08:44,925 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 15:11:51,642 (trainer:732) INFO: 18epoch:train:2401-2500batch: iter_time=1.318, forward_time=0.127, loss_ctc=84.958, loss_att=62.041, acc=0.667, loss=68.916, backward_time=0.850, grad_norm=104.647, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.772e-05, train_time=7.061
+[gpua003:0/64] 2023-07-06 15:13:33,609 (trainer:732) INFO: 18epoch:train:2501-2600batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=75.155, loss_att=62.287, acc=0.691, loss=66.148, backward_time=0.764, grad_norm=96.943, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.769e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 15:15:13,429 (trainer:732) INFO: 18epoch:train:2601-2700batch: iter_time=1.057e-04, forward_time=0.108, loss_ctc=76.932, loss_att=63.507, acc=0.694, loss=67.535, backward_time=0.750, grad_norm=96.917, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.766e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 15:16:53,331 (trainer:732) INFO: 18epoch:train:2701-2800batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=62.411, loss_att=46.895, acc=0.715, loss=51.550, backward_time=0.752, grad_norm=92.496, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.764e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 15:18:33,060 (trainer:732) INFO: 18epoch:train:2801-2900batch: iter_time=9.940e-05, forward_time=0.108, loss_ctc=81.354, loss_att=57.151, acc=0.696, loss=64.412, backward_time=0.751, grad_norm=109.033, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.761e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:20:12,823 (trainer:732) INFO: 18epoch:train:2901-3000batch: iter_time=1.129e-04, forward_time=0.108, loss_ctc=73.741, loss_att=58.586, acc=0.679, loss=63.132, backward_time=0.752, grad_norm=91.478, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.758e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:21:52,679 (trainer:732) INFO: 18epoch:train:3001-3100batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=83.727, loss_att=65.874, acc=0.669, loss=71.230, backward_time=0.752, grad_norm=106.440, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.756e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 15:23:32,451 (trainer:732) INFO: 18epoch:train:3101-3200batch: iter_time=1.030e-04, forward_time=0.109, loss_ctc=67.147, loss_att=49.955, acc=0.689, loss=55.113, backward_time=0.751, grad_norm=107.665, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.753e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:25:14,937 (trainer:732) INFO: 18epoch:train:3201-3300batch: iter_time=9.253e-05, forward_time=0.108, loss_ctc=81.896, loss_att=61.574, acc=0.673, loss=67.671, backward_time=0.753, grad_norm=105.660, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.750e-05, train_time=2.049
+[gpua003:0/64] 2023-07-06 15:25:48,262 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 15:26:07,548 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a3a77ec50>)
+[gpua003:0/64] 2023-07-06 15:26:11,328 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 15:26:11,334 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 15:29:42,037 (trainer:732) INFO: 18epoch:train:3301-3400batch: iter_time=1.323, forward_time=0.110, loss_ctc=72.978, loss_att=56.716, acc=0.686, loss=61.595, backward_time=0.775, grad_norm=91.640, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.748e-05, train_time=5.342
+[gpua003:0/64] 2023-07-06 15:31:22,262 (trainer:732) INFO: 18epoch:train:3401-3500batch: iter_time=1.067e-04, forward_time=0.108, loss_ctc=72.957, loss_att=62.528, acc=0.685, loss=65.657, backward_time=0.753, grad_norm=88.960, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.745e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 15:33:02,184 (trainer:732) INFO: 18epoch:train:3501-3600batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=67.727, loss_att=54.922, acc=0.697, loss=58.763, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.742e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 15:34:41,830 (trainer:732) INFO: 18epoch:train:3601-3700batch: iter_time=1.085e-04, forward_time=0.108, loss_ctc=72.865, loss_att=53.783, acc=0.699, loss=59.507, backward_time=0.752, grad_norm=105.810, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.740e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 15:36:21,533 (trainer:732) INFO: 18epoch:train:3701-3800batch: iter_time=1.178e-04, forward_time=0.109, loss_ctc=78.415, loss_att=59.688, acc=0.681, loss=65.306, backward_time=0.752, grad_norm=94.163, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.737e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:38:01,291 (trainer:732) INFO: 18epoch:train:3801-3900batch: iter_time=1.163e-04, forward_time=0.109, loss_ctc=74.930, loss_att=57.824, acc=0.677, loss=62.956, backward_time=0.752, grad_norm=98.833, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.114, optim0_lr0=8.734e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:39:41,058 (trainer:732) INFO: 18epoch:train:3901-4000batch: iter_time=1.116e-04, forward_time=0.109, loss_ctc=73.796, loss_att=59.813, acc=0.674, loss=64.008, backward_time=0.753, grad_norm=105.954, clip=100.000, loss_scale=7.206e+16, optim_step_time=0.113, optim0_lr0=8.732e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:41:20,935 (trainer:732) INFO: 18epoch:train:4001-4100batch: iter_time=9.829e-05, forward_time=0.109, loss_ctc=71.016, loss_att=54.671, acc=0.686, loss=59.574, backward_time=0.753, grad_norm=88.325, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.729e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 15:42:27,084 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 15:42:46,118 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e88fafb0>)
+[gpua003:0/64] 2023-07-06 15:42:49,698 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 15:42:49,704 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 15:46:38,959 (trainer:732) INFO: 18epoch:train:4101-4200batch: iter_time=1.311, forward_time=0.109, loss_ctc=82.633, loss_att=57.106, acc=0.674, loss=64.764, backward_time=0.765, grad_norm=108.721, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.726e-05, train_time=6.360
+[gpua003:0/64] 2023-07-06 15:48:19,686 (trainer:732) INFO: 18epoch:train:4201-4300batch: iter_time=9.726e-05, forward_time=0.109, loss_ctc=74.521, loss_att=62.322, acc=0.689, loss=65.982, backward_time=0.757, grad_norm=96.759, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.724e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 15:49:59,357 (trainer:732) INFO: 18epoch:train:4301-4400batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=68.417, loss_att=54.908, acc=0.700, loss=58.961, backward_time=0.751, grad_norm=87.929, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.721e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 15:51:39,010 (trainer:732) INFO: 18epoch:train:4401-4500batch: iter_time=1.019e-04, forward_time=0.108, loss_ctc=72.963, loss_att=54.105, acc=0.704, loss=59.763, backward_time=0.750, grad_norm=121.911, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.718e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 15:53:18,767 (trainer:732) INFO: 18epoch:train:4501-4600batch: iter_time=1.001e-04, forward_time=0.108, loss_ctc=78.466, loss_att=59.773, acc=0.690, loss=65.381, backward_time=0.751, grad_norm=96.401, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.716e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 15:54:58,498 (trainer:732) INFO: 18epoch:train:4601-4700batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=76.432, loss_att=58.796, acc=0.677, loss=64.087, backward_time=0.752, grad_norm=97.092, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.713e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 15:56:38,047 (trainer:732) INFO: 18epoch:train:4701-4800batch: iter_time=9.499e-05, forward_time=0.108, loss_ctc=79.331, loss_att=63.852, acc=0.665, loss=68.495, backward_time=0.750, grad_norm=256.176, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.710e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 15:58:19,964 (trainer:732) INFO: 18epoch:train:4801-4900batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=68.325, loss_att=52.879, acc=0.682, loss=57.513, backward_time=0.751, grad_norm=118.723, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.708e-05, train_time=2.038
+[gpua003:0/64] 2023-07-06 16:00:00,347 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 16:00:19,563 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88444ab610>)
+[gpua003:0/64] 2023-07-06 16:00:23,071 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 16:00:23,077 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 16:03:54,812 (trainer:732) INFO: 18epoch:train:4901-5000batch: iter_time=1.281, forward_time=0.108, loss_ctc=83.769, loss_att=60.724, acc=0.667, loss=67.638, backward_time=0.761, grad_norm=148.559, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.705e-05, train_time=6.697
+[gpua003:0/64] 2023-07-06 16:05:37,501 (trainer:732) INFO: 18epoch:train:5001-5100batch: iter_time=9.821e-05, forward_time=0.108, loss_ctc=74.654, loss_att=62.672, acc=0.686, loss=66.267, backward_time=0.760, grad_norm=88.103, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.702e-05, train_time=2.054
+[gpua003:0/64] 2023-07-06 16:07:17,638 (trainer:732) INFO: 18epoch:train:5101-5200batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=74.717, loss_att=61.342, acc=0.690, loss=65.354, backward_time=0.751, grad_norm=111.773, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.700e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 16:08:57,432 (trainer:732) INFO: 18epoch:train:5201-5300batch: iter_time=1.039e-04, forward_time=0.109, loss_ctc=62.277, loss_att=47.038, acc=0.711, loss=51.609, backward_time=0.752, grad_norm=77.470, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.697e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 16:10:37,101 (trainer:732) INFO: 18epoch:train:5301-5400batch: iter_time=1.087e-04, forward_time=0.108, loss_ctc=81.357, loss_att=56.958, acc=0.687, loss=64.278, backward_time=0.751, grad_norm=148.292, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.695e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 16:12:16,859 (trainer:732) INFO: 18epoch:train:5401-5500batch: iter_time=1.099e-04, forward_time=0.109, loss_ctc=73.380, loss_att=57.785, acc=0.682, loss=62.463, backward_time=0.751, grad_norm=91.302, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.692e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 16:13:56,539 (trainer:732) INFO: 18epoch:train:5501-5600batch: iter_time=1.039e-04, forward_time=0.108, loss_ctc=81.378, loss_att=65.579, acc=0.665, loss=70.319, backward_time=0.751, grad_norm=99.738, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.689e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 16:15:35,974 (trainer:732) INFO: 18epoch:train:5601-5700batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=65.790, loss_att=48.674, acc=0.695, loss=53.808, backward_time=0.749, grad_norm=111.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.687e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 16:17:15,561 (trainer:732) INFO: 18epoch:train:5701-5800batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=82.875, loss_att=60.270, acc=0.669, loss=67.051, backward_time=0.750, grad_norm=114.126, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.684e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 16:17:48,761 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 16:18:08,112 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88ae1d5060>)
+[gpua003:0/64] 2023-07-06 16:18:11,617 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 16:18:11,623 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 16:22:38,235 (trainer:732) INFO: 18epoch:train:5801-5900batch: iter_time=1.313, forward_time=0.109, loss_ctc=73.660, loss_att=59.347, acc=0.685, loss=63.641, backward_time=0.764, grad_norm=132.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.681e-05, train_time=6.453
+[gpua003:0/64] 2023-07-06 16:24:18,781 (trainer:732) INFO: 18epoch:train:5901-6000batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=76.119, loss_att=58.539, acc=0.690, loss=63.813, backward_time=0.754, grad_norm=88.467, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.679e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 16:26:06,138 (trainer:732) INFO: 18epoch:train:6001-6100batch: iter_time=9.305e-05, forward_time=0.109, loss_ctc=67.971, loss_att=52.451, acc=0.704, loss=57.107, backward_time=0.766, grad_norm=95.395, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.676e-05, train_time=2.147
+[gpua003:0/64] 2023-07-06 16:27:49,192 (trainer:732) INFO: 18epoch:train:6101-6200batch: iter_time=9.965e-05, forward_time=0.108, loss_ctc=77.390, loss_att=55.435, acc=0.689, loss=62.021, backward_time=0.756, grad_norm=106.144, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.674e-05, train_time=2.061
+[gpua003:0/64] 2023-07-06 16:29:29,408 (trainer:732) INFO: 18epoch:train:6201-6300batch: iter_time=9.552e-05, forward_time=0.109, loss_ctc=70.826, loss_att=56.281, acc=0.684, loss=60.644, backward_time=0.753, grad_norm=94.037, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.671e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 16:31:09,234 (trainer:732) INFO: 18epoch:train:6301-6400batch: iter_time=9.346e-05, forward_time=0.107, loss_ctc=78.001, loss_att=63.797, acc=0.672, loss=68.058, backward_time=0.751, grad_norm=98.998, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.668e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 16:32:48,924 (trainer:732) INFO: 18epoch:train:6401-6500batch: iter_time=9.280e-05, forward_time=0.108, loss_ctc=72.916, loss_att=53.940, acc=0.675, loss=59.633, backward_time=0.751, grad_norm=86.057, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.666e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 16:34:29,082 (trainer:732) INFO: 18epoch:train:6501-6600batch: iter_time=1.069e-04, forward_time=0.107, loss_ctc=74.326, loss_att=55.265, acc=0.677, loss=60.983, backward_time=0.752, grad_norm=94.716, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.663e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 16:35:38,397 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 16:35:57,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:36:01,099 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8840690fa0>)
+[gpua003:0/64] 2023-07-06 16:36:01,100 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 16:36:01,106 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 16:41:32,144 (trainer:732) INFO: 18epoch:train:6601-6700batch: iter_time=1.282, forward_time=0.107, loss_ctc=83.492, loss_att=58.963, acc=0.681, loss=66.321, backward_time=0.770, grad_norm=107.633, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.661e-05, train_time=8.461
+[gpua003:0/64] 2023-07-06 16:43:17,374 (trainer:732) INFO: 18epoch:train:6701-6800batch: iter_time=9.000e-05, forward_time=0.107, loss_ctc=72.670, loss_att=61.072, acc=0.690, loss=64.551, backward_time=0.767, grad_norm=93.284, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.658e-05, train_time=2.104
+[gpua003:0/64] 2023-07-06 16:44:57,475 (trainer:732) INFO: 18epoch:train:6801-6900batch: iter_time=8.929e-05, forward_time=0.107, loss_ctc=68.519, loss_att=54.104, acc=0.697, loss=58.428, backward_time=0.751, grad_norm=89.012, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.655e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 16:46:44,014 (trainer:732) INFO: 18epoch:train:6901-7000batch: iter_time=8.633e-05, forward_time=0.107, loss_ctc=73.189, loss_att=52.360, acc=0.706, loss=58.609, backward_time=0.755, grad_norm=131.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.112, optim0_lr0=8.653e-05, train_time=2.131
+[gpua003:0/64] 2023-07-06 16:48:38,048 (trainer:732) INFO: 18epoch:train:7001-7100batch: iter_time=9.301e-05, forward_time=0.107, loss_ctc=76.911, loss_att=60.205, acc=0.681, loss=65.217, backward_time=0.775, grad_norm=91.195, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.650e-05, train_time=2.280
+[gpua003:0/64] 2023-07-06 16:50:17,837 (trainer:732) INFO: 18epoch:train:7101-7200batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=73.833, loss_att=56.759, acc=0.685, loss=61.881, backward_time=0.750, grad_norm=97.936, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.648e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 16:51:58,408 (trainer:732) INFO: 18epoch:train:7201-7300batch: iter_time=9.376e-05, forward_time=0.107, loss_ctc=76.637, loss_att=62.319, acc=0.667, loss=66.614, backward_time=0.752, grad_norm=113.058, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.645e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 16:54:05,563 (trainer:732) INFO: 18epoch:train:7301-7400batch: iter_time=1.831e-04, forward_time=0.124, loss_ctc=68.742, loss_att=52.246, acc=0.684, loss=57.195, backward_time=0.816, grad_norm=109.431, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.119, optim0_lr0=8.642e-05, train_time=2.543
+[gpua003:0/64] 2023-07-06 16:55:47,933 (trainer:732) INFO: 18epoch:train:7401-7500batch: iter_time=9.365e-05, forward_time=0.110, loss_ctc=82.184, loss_att=57.189, acc=0.675, loss=64.687, backward_time=0.760, grad_norm=142.708, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.640e-05, train_time=2.047
+[gpua003:0/64] 2023-07-06 16:55:54,106 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 16:56:13,402 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 16:56:16,909 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89dd95bfa0>)
+[gpua003:0/64] 2023-07-06 16:56:16,910 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 16:56:16,916 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 17:00:34,079 (trainer:732) INFO: 18epoch:train:7501-7600batch: iter_time=1.505, forward_time=0.108, loss_ctc=74.300, loss_att=63.152, acc=0.683, loss=66.497, backward_time=0.765, grad_norm=141.163, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.637e-05, train_time=5.723
+[gpua003:0/64] 2023-07-06 17:02:15,733 (trainer:732) INFO: 18epoch:train:7601-7700batch: iter_time=9.276e-05, forward_time=0.109, loss_ctc=75.341, loss_att=59.650, acc=0.691, loss=64.357, backward_time=0.757, grad_norm=131.218, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.635e-05, train_time=2.033
+[gpua003:0/64] 2023-07-06 17:04:00,532 (trainer:732) INFO: 18epoch:train:7701-7800batch: iter_time=9.513e-05, forward_time=0.108, loss_ctc=62.710, loss_att=47.592, acc=0.714, loss=52.128, backward_time=0.758, grad_norm=100.496, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.632e-05, train_time=2.096
+[gpua003:0/64] 2023-07-06 17:05:41,261 (trainer:732) INFO: 18epoch:train:7801-7900batch: iter_time=1.024e-04, forward_time=0.109, loss_ctc=80.778, loss_att=56.500, acc=0.691, loss=63.783, backward_time=0.753, grad_norm=159.514, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.630e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 17:07:23,370 (trainer:732) INFO: 18epoch:train:7901-8000batch: iter_time=9.871e-05, forward_time=0.109, loss_ctc=72.426, loss_att=57.186, acc=0.677, loss=61.758, backward_time=0.755, grad_norm=100.381, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.627e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 17:09:03,079 (trainer:732) INFO: 18epoch:train:8001-8100batch: iter_time=9.808e-05, forward_time=0.108, loss_ctc=81.683, loss_att=65.407, acc=0.673, loss=70.289, backward_time=0.751, grad_norm=103.619, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.624e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 17:10:54,113 (trainer:732) INFO: 18epoch:train:8101-8200batch: iter_time=1.053e-04, forward_time=0.108, loss_ctc=64.899, loss_att=47.680, acc=0.696, loss=52.846, backward_time=0.765, grad_norm=84.417, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.622e-05, train_time=2.220
+[gpua003:0/64] 2023-07-06 17:12:33,937 (trainer:732) INFO: 18epoch:train:8201-8300batch: iter_time=9.519e-05, forward_time=0.107, loss_ctc=80.088, loss_att=59.873, acc=0.669, loss=65.937, backward_time=0.751, grad_norm=140.365, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.619e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 17:13:08,280 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 17:13:27,649 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d3f86ce0>)
+[gpua003:0/64] 2023-07-06 17:13:31,147 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 17:13:31,153 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 17:18:59,187 (trainer:732) INFO: 18epoch:train:8301-8400batch: iter_time=1.294, forward_time=0.107, loss_ctc=72.616, loss_att=56.151, acc=0.688, loss=61.090, backward_time=0.763, grad_norm=159.886, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.617e-05, train_time=7.705
+[gpua003:0/64] 2023-07-06 17:20:40,408 (trainer:732) INFO: 18epoch:train:8401-8500batch: iter_time=1.002e-04, forward_time=0.108, loss_ctc=72.837, loss_att=60.069, acc=0.694, loss=63.899, backward_time=0.752, grad_norm=118.934, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.614e-05, train_time=2.024
+[gpua003:0/64] 2023-07-06 17:22:20,274 (trainer:732) INFO: 18epoch:train:8501-8600batch: iter_time=9.768e-05, forward_time=0.108, loss_ctc=68.421, loss_att=53.892, acc=0.698, loss=58.250, backward_time=0.752, grad_norm=96.100, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.612e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 17:24:14,320 (trainer:732) INFO: 18epoch:train:8601-8700batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=68.857, loss_att=51.084, acc=0.707, loss=56.416, backward_time=0.768, grad_norm=78.940, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.609e-05, train_time=2.281
+[gpua003:0/64] 2023-07-06 17:25:54,219 (trainer:732) INFO: 18epoch:train:8701-8800batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=79.583, loss_att=60.458, acc=0.683, loss=66.195, backward_time=0.752, grad_norm=99.339, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.607e-05, train_time=1.998
+[gpua003:0/64] 2023-07-06 17:27:34,084 (trainer:732) INFO: 18epoch:train:8801-8900batch: iter_time=9.920e-05, forward_time=0.108, loss_ctc=74.773, loss_att=57.128, acc=0.683, loss=62.421, backward_time=0.753, grad_norm=91.643, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.604e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 17:29:13,700 (trainer:732) INFO: 18epoch:train:8901-9000batch: iter_time=1.083e-04, forward_time=0.107, loss_ctc=74.198, loss_att=58.596, acc=0.679, loss=63.277, backward_time=0.750, grad_norm=131.138, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.601e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 17:30:53,462 (trainer:732) INFO: 18epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=72.289, loss_att=54.298, acc=0.688, loss=59.695, backward_time=0.750, grad_norm=98.942, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.599e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 17:32:00,571 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 17:32:19,636 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a2a770d0>)
+[gpua003:0/64] 2023-07-06 17:32:23,155 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 17:32:23,161 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 17:36:57,445 (trainer:732) INFO: 18epoch:train:9101-9200batch: iter_time=1.310, forward_time=0.109, loss_ctc=81.340, loss_att=56.298, acc=0.680, loss=63.811, backward_time=0.761, grad_norm=111.662, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.596e-05, train_time=7.279
+[gpua003:0/64] 2023-07-06 17:38:39,715 (trainer:732) INFO: 18epoch:train:9201-9300batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=73.285, loss_att=63.161, acc=0.686, loss=66.198, backward_time=0.757, grad_norm=88.279, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.594e-05, train_time=2.045
+[gpua003:0/64] 2023-07-06 17:40:23,113 (trainer:732) INFO: 18epoch:train:9301-9400batch: iter_time=1.137e-04, forward_time=0.109, loss_ctc=68.800, loss_att=55.601, acc=0.706, loss=59.561, backward_time=0.756, grad_norm=92.752, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.591e-05, train_time=2.068
+[gpua003:0/64] 2023-07-06 17:42:05,313 (trainer:732) INFO: 18epoch:train:9401-9500batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.284, loss_att=52.694, acc=0.710, loss=58.571, backward_time=0.754, grad_norm=98.040, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.589e-05, train_time=2.044
+[gpua003:0/64] 2023-07-06 17:43:45,031 (trainer:732) INFO: 18epoch:train:9501-9600batch: iter_time=1.102e-04, forward_time=0.109, loss_ctc=76.407, loss_att=59.152, acc=0.693, loss=64.328, backward_time=0.751, grad_norm=111.122, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.586e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 17:45:24,576 (trainer:732) INFO: 18epoch:train:9601-9700batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=74.718, loss_att=58.743, acc=0.682, loss=63.535, backward_time=0.750, grad_norm=102.896, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.584e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 17:47:08,153 (trainer:732) INFO: 18epoch:train:9701-9800batch: iter_time=1.040e-04, forward_time=0.107, loss_ctc=76.130, loss_att=62.394, acc=0.672, loss=66.515, backward_time=0.752, grad_norm=99.445, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.581e-05, train_time=2.071
+[gpua003:0/64] 2023-07-06 17:48:48,960 (trainer:732) INFO: 18epoch:train:9801-9900batch: iter_time=9.027e-05, forward_time=0.108, loss_ctc=67.693, loss_att=51.247, acc=0.689, loss=56.181, backward_time=0.758, grad_norm=101.173, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.579e-05, train_time=2.016
+[gpua003:0/64] 2023-07-06 17:50:28,839 (trainer:732) INFO: 18epoch:train:9901-10000batch: iter_time=8.743e-05, forward_time=0.109, loss_ctc=81.308, loss_att=58.352, acc=0.679, loss=65.239, backward_time=0.752, grad_norm=97.815, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.576e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 18:02:52,400 (trainer:338) INFO: 18epoch results: [train] iter_time=0.198, forward_time=0.110, loss_ctc=75.020, loss_att=57.840, acc=0.686, loss=62.994, backward_time=0.757, grad_norm=106.540, clip=100.000, loss_scale=1.441e+17, optim_step_time=0.113, optim0_lr0=8.705e-05, train_time=2.652, time=3 hours, 41 minutes and 15.98 seconds, total_count=150000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.528, cer_ctc=0.288, loss_att=42.295, acc=0.657, cer=0.376, wer=0.987, loss=44.765, time=5 minutes and 47.09 seconds, total_count=15686, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 21.52 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-06 18:03:10,995 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-06 18:03:11,099 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/13epoch.pth
+[gpua003:0/64] 2023-07-06 18:03:11,142 (trainer:272) INFO: 19/100epoch started. Estimated time to finish: 1 week, 6 days and 5 hours
+[gpua003:0/64] 2023-07-06 18:03:12,483 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 18:03:31,414 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f97b4d36f80>)
+[gpua003:0/64] 2023-07-06 18:03:36,058 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 18:03:36,157 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 18:10:33,322 (trainer:732) INFO: 19epoch:train:1-100batch: iter_time=3.360, forward_time=0.134, loss_ctc=71.878, loss_att=52.487, acc=0.687, loss=58.305, backward_time=0.768, grad_norm=98.210, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.115, optim0_lr0=8.574e-05, train_time=8.830
+[gpua003:0/64] 2023-07-06 18:12:13,736 (trainer:732) INFO: 19epoch:train:101-200batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.951, loss_att=54.611, acc=0.685, loss=60.413, backward_time=0.752, grad_norm=97.472, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.571e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 18:13:54,017 (trainer:732) INFO: 19epoch:train:201-300batch: iter_time=9.643e-05, forward_time=0.108, loss_ctc=71.382, loss_att=53.617, acc=0.688, loss=58.947, backward_time=0.750, grad_norm=79.712, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.569e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 18:15:36,625 (trainer:732) INFO: 19epoch:train:301-400batch: iter_time=9.724e-05, forward_time=0.108, loss_ctc=75.527, loss_att=61.014, acc=0.673, loss=65.368, backward_time=0.757, grad_norm=94.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.566e-05, train_time=2.052
+[gpua003:0/64] 2023-07-06 18:17:17,089 (trainer:732) INFO: 19epoch:train:401-500batch: iter_time=9.887e-05, forward_time=0.109, loss_ctc=73.023, loss_att=58.274, acc=0.684, loss=62.699, backward_time=0.751, grad_norm=90.553, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.564e-05, train_time=2.009
+[gpua003:0/64] 2023-07-06 18:19:01,828 (trainer:732) INFO: 19epoch:train:501-600batch: iter_time=8.940e-05, forward_time=0.108, loss_ctc=71.310, loss_att=57.750, acc=0.697, loss=61.818, backward_time=0.756, grad_norm=100.266, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.561e-05, train_time=2.095
+[gpua003:0/64] 2023-07-06 18:21:04,708 (trainer:732) INFO: 19epoch:train:601-700batch: iter_time=9.789e-05, forward_time=0.110, loss_ctc=68.405, loss_att=52.910, acc=0.711, loss=57.558, backward_time=0.796, grad_norm=82.647, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.559e-05, train_time=2.457
+[gpua003:0/64] 2023-07-06 18:22:58,725 (trainer:732) INFO: 19epoch:train:701-800batch: iter_time=1.057e-04, forward_time=0.111, loss_ctc=85.051, loss_att=69.937, acc=0.676, loss=74.471, backward_time=0.799, grad_norm=123.148, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.556e-05, train_time=2.280
+[gpua003:0/64] 2023-07-06 18:23:39,249 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 18:23:57,976 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e935bdf0>)
+[gpua003:0/64] 2023-07-06 18:24:01,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-06 18:24:01,679 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 18:27:57,894 (trainer:732) INFO: 19epoch:train:801-900batch: iter_time=1.349, forward_time=0.109, loss_ctc=74.949, loss_att=56.631, acc=0.679, loss=62.126, backward_time=0.766, grad_norm=85.017, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.553e-05, train_time=5.983
+[gpua003:0/64] 2023-07-06 18:29:38,263 (trainer:732) INFO: 19epoch:train:901-1000batch: iter_time=1.127e-04, forward_time=0.108, loss_ctc=74.503, loss_att=54.158, acc=0.674, loss=60.262, backward_time=0.751, grad_norm=102.346, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.551e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 18:31:18,016 (trainer:732) INFO: 19epoch:train:1001-1100batch: iter_time=1.100e-04, forward_time=0.108, loss_ctc=74.767, loss_att=56.937, acc=0.684, loss=62.286, backward_time=0.752, grad_norm=82.548, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.548e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 18:32:57,536 (trainer:732) INFO: 19epoch:train:1101-1200batch: iter_time=1.159e-04, forward_time=0.107, loss_ctc=70.675, loss_att=55.035, acc=0.670, loss=59.727, backward_time=0.752, grad_norm=94.405, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.546e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 18:34:37,089 (trainer:732) INFO: 19epoch:train:1201-1300batch: iter_time=1.106e-04, forward_time=0.107, loss_ctc=76.324, loss_att=59.642, acc=0.686, loss=64.647, backward_time=0.751, grad_norm=93.883, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.112, optim0_lr0=8.544e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 18:36:17,028 (trainer:732) INFO: 19epoch:train:1301-1400batch: iter_time=9.751e-05, forward_time=0.109, loss_ctc=67.885, loss_att=54.884, acc=0.685, loss=58.784, backward_time=0.753, grad_norm=91.475, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.541e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 18:37:56,460 (trainer:732) INFO: 19epoch:train:1401-1500batch: iter_time=1.118e-04, forward_time=0.107, loss_ctc=66.717, loss_att=53.410, acc=0.694, loss=57.402, backward_time=0.750, grad_norm=80.970, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.539e-05, train_time=1.988
+[gpua003:0/64] 2023-07-06 18:39:36,031 (trainer:732) INFO: 19epoch:train:1501-1600batch: iter_time=8.864e-05, forward_time=0.108, loss_ctc=81.647, loss_att=67.344, acc=0.685, loss=71.635, backward_time=0.750, grad_norm=114.722, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.536e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 18:40:44,261 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 18:41:03,688 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e91bfaf0>)
+[gpua003:0/64] 2023-07-06 18:41:07,475 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-06 18:41:07,482 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 18:45:41,292 (trainer:732) INFO: 19epoch:train:1601-1700batch: iter_time=1.309, forward_time=0.108, loss_ctc=77.277, loss_att=60.983, acc=0.668, loss=65.871, backward_time=0.768, grad_norm=93.748, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.534e-05, train_time=7.305
+[gpua003:0/64] 2023-07-06 18:47:21,491 (trainer:732) INFO: 19epoch:train:1701-1800batch: iter_time=1.143e-04, forward_time=0.109, loss_ctc=73.968, loss_att=51.681, acc=0.683, loss=58.367, backward_time=0.753, grad_norm=97.133, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.531e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 18:49:01,195 (trainer:732) INFO: 19epoch:train:1801-1900batch: iter_time=1.050e-04, forward_time=0.108, loss_ctc=73.852, loss_att=56.822, acc=0.683, loss=61.931, backward_time=0.751, grad_norm=95.195, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.529e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 18:50:40,963 (trainer:732) INFO: 19epoch:train:1901-2000batch: iter_time=1.119e-04, forward_time=0.109, loss_ctc=68.398, loss_att=50.868, acc=0.690, loss=56.127, backward_time=0.752, grad_norm=82.918, clip=100.000, loss_scale=2.882e+17, optim_step_time=0.113, optim0_lr0=8.526e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 18:52:20,494 (trainer:732) INFO: 19epoch:train:2001-2100batch: iter_time=1.182e-04, forward_time=0.108, loss_ctc=72.453, loss_att=55.434, acc=0.682, loss=60.540, backward_time=0.750, grad_norm=91.971, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.524e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 18:54:00,169 (trainer:732) INFO: 19epoch:train:2101-2200batch: iter_time=1.126e-04, forward_time=0.109, loss_ctc=70.471, loss_att=60.656, acc=0.681, loss=63.600, backward_time=0.752, grad_norm=92.734, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.521e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 18:55:43,499 (trainer:732) INFO: 19epoch:train:2201-2300batch: iter_time=1.184e-04, forward_time=0.109, loss_ctc=70.887, loss_att=55.798, acc=0.691, loss=60.325, backward_time=0.753, grad_norm=87.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.519e-05, train_time=2.066
+[gpua003:0/64] 2023-07-06 18:57:28,468 (trainer:732) INFO: 19epoch:train:2301-2400batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=74.302, loss_att=62.084, acc=0.687, loss=65.750, backward_time=0.757, grad_norm=90.024, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.516e-05, train_time=2.099
+[gpua003:0/64] 2023-07-06 18:59:09,045 (trainer:732) INFO: 19epoch:train:2401-2500batch: iter_time=9.995e-05, forward_time=0.109, loss_ctc=83.902, loss_att=62.709, acc=0.676, loss=69.067, backward_time=0.757, grad_norm=139.737, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.514e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 18:59:11,328 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 18:59:30,666 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f884e24ca60>)
+[gpua003:0/64] 2023-07-06 18:59:34,430 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-06 18:59:34,436 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 19:05:02,322 (trainer:732) INFO: 19epoch:train:2501-2600batch: iter_time=1.326, forward_time=0.108, loss_ctc=70.364, loss_att=51.755, acc=0.683, loss=57.338, backward_time=0.793, grad_norm=99.243, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.511e-05, train_time=7.065
+[gpua003:0/64] 2023-07-06 19:06:42,318 (trainer:732) INFO: 19epoch:train:2601-2700batch: iter_time=1.063e-04, forward_time=0.108, loss_ctc=73.763, loss_att=53.729, acc=0.683, loss=59.739, backward_time=0.752, grad_norm=99.427, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.509e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 19:08:21,986 (trainer:732) INFO: 19epoch:train:2701-2800batch: iter_time=9.496e-05, forward_time=0.107, loss_ctc=69.928, loss_att=54.179, acc=0.687, loss=58.904, backward_time=0.752, grad_norm=87.933, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.506e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 19:10:01,778 (trainer:732) INFO: 19epoch:train:2801-2900batch: iter_time=9.847e-05, forward_time=0.108, loss_ctc=70.824, loss_att=55.649, acc=0.683, loss=60.201, backward_time=0.753, grad_norm=104.518, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.504e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 19:11:43,478 (trainer:732) INFO: 19epoch:train:2901-3000batch: iter_time=8.790e-05, forward_time=0.108, loss_ctc=71.744, loss_att=56.572, acc=0.683, loss=61.124, backward_time=0.753, grad_norm=97.646, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.501e-05, train_time=2.034
+[gpua003:0/64] 2023-07-06 19:13:23,321 (trainer:732) INFO: 19epoch:train:3001-3100batch: iter_time=8.518e-05, forward_time=0.108, loss_ctc=68.969, loss_att=56.135, acc=0.697, loss=59.985, backward_time=0.752, grad_norm=93.645, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.499e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:15:02,845 (trainer:732) INFO: 19epoch:train:3101-3200batch: iter_time=9.418e-05, forward_time=0.107, loss_ctc=66.684, loss_att=49.410, acc=0.712, loss=54.592, backward_time=0.751, grad_norm=86.075, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.496e-05, train_time=1.990
+[gpua003:0/64] 2023-07-06 19:16:42,465 (trainer:732) INFO: 19epoch:train:3201-3300batch: iter_time=1.012e-04, forward_time=0.108, loss_ctc=83.199, loss_att=70.634, acc=0.670, loss=74.404, backward_time=0.751, grad_norm=111.083, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.494e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 19:17:18,787 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 19:17:38,120 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a2451fe20>)
+[gpua003:0/64] 2023-07-06 19:17:41,665 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 19:17:41,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 19:23:47,034 (trainer:732) INFO: 19epoch:train:3301-3400batch: iter_time=3.115, forward_time=0.165, loss_ctc=73.481, loss_att=53.362, acc=0.685, loss=59.397, backward_time=0.768, grad_norm=89.020, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.115, optim0_lr0=8.492e-05, train_time=8.491
+[gpua003:0/64] 2023-07-06 19:25:27,393 (trainer:732) INFO: 19epoch:train:3401-3500batch: iter_time=1.015e-04, forward_time=0.109, loss_ctc=74.841, loss_att=53.947, acc=0.678, loss=60.215, backward_time=0.753, grad_norm=92.488, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.489e-05, train_time=2.007
+[gpua003:0/64] 2023-07-06 19:27:17,900 (trainer:732) INFO: 19epoch:train:3501-3600batch: iter_time=8.651e-05, forward_time=0.108, loss_ctc=73.019, loss_att=56.463, acc=0.684, loss=61.429, backward_time=0.762, grad_norm=97.312, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.487e-05, train_time=2.210
+[gpua003:0/64] 2023-07-06 19:28:57,771 (trainer:732) INFO: 19epoch:train:3601-3700batch: iter_time=9.262e-05, forward_time=0.108, loss_ctc=70.917, loss_att=54.082, acc=0.678, loss=59.133, backward_time=0.753, grad_norm=99.327, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.484e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:30:37,491 (trainer:732) INFO: 19epoch:train:3701-3800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.384, loss_att=57.937, acc=0.690, loss=62.571, backward_time=0.750, grad_norm=93.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.482e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 19:32:17,208 (trainer:732) INFO: 19epoch:train:3801-3900batch: iter_time=1.142e-04, forward_time=0.108, loss_ctc=67.881, loss_att=55.385, acc=0.682, loss=59.133, backward_time=0.751, grad_norm=104.375, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.479e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 19:33:56,953 (trainer:732) INFO: 19epoch:train:3901-4000batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=66.750, loss_att=52.965, acc=0.698, loss=57.101, backward_time=0.752, grad_norm=101.994, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.477e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 19:35:36,807 (trainer:732) INFO: 19epoch:train:4001-4100batch: iter_time=9.853e-05, forward_time=0.108, loss_ctc=78.586, loss_att=65.422, acc=0.693, loss=69.371, backward_time=0.753, grad_norm=90.671, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.475e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:36:46,521 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 19:37:05,501 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f1542f610>)
+[gpua003:0/64] 2023-07-06 19:37:09,017 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-06 19:37:09,023 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 19:42:01,856 (trainer:732) INFO: 19epoch:train:4101-4200batch: iter_time=1.271, forward_time=0.108, loss_ctc=77.470, loss_att=59.834, acc=0.674, loss=65.125, backward_time=0.764, grad_norm=104.225, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.472e-05, train_time=7.701
+[gpua003:0/64] 2023-07-06 19:43:42,839 (trainer:732) INFO: 19epoch:train:4201-4300batch: iter_time=9.875e-05, forward_time=0.108, loss_ctc=72.007, loss_att=52.479, acc=0.697, loss=58.337, backward_time=0.754, grad_norm=104.085, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.470e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 19:45:25,413 (trainer:732) INFO: 19epoch:train:4301-4400batch: iter_time=8.662e-05, forward_time=0.108, loss_ctc=75.337, loss_att=58.219, acc=0.688, loss=63.354, backward_time=0.759, grad_norm=99.286, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.467e-05, train_time=2.051
+[gpua003:0/64] 2023-07-06 19:47:05,267 (trainer:732) INFO: 19epoch:train:4401-4500batch: iter_time=1.128e-04, forward_time=0.108, loss_ctc=68.687, loss_att=50.921, acc=0.699, loss=56.251, backward_time=0.752, grad_norm=83.793, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.465e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 19:48:45,031 (trainer:732) INFO: 19epoch:train:4501-4600batch: iter_time=1.138e-04, forward_time=0.109, loss_ctc=66.702, loss_att=53.486, acc=0.690, loss=57.451, backward_time=0.751, grad_norm=87.055, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.462e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 19:50:28,601 (trainer:732) INFO: 19epoch:train:4601-4700batch: iter_time=1.079e-04, forward_time=0.108, loss_ctc=72.286, loss_att=61.308, acc=0.686, loss=64.601, backward_time=0.756, grad_norm=99.764, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.460e-05, train_time=2.071
+[gpua003:0/64] 2023-07-06 19:52:08,198 (trainer:732) INFO: 19epoch:train:4701-4800batch: iter_time=1.036e-04, forward_time=0.108, loss_ctc=71.867, loss_att=54.172, acc=0.705, loss=59.481, backward_time=0.750, grad_norm=109.482, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.458e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 19:53:47,911 (trainer:732) INFO: 19epoch:train:4801-4900batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=73.498, loss_att=60.646, acc=0.710, loss=64.501, backward_time=0.751, grad_norm=132.193, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.455e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 19:55:27,532 (trainer:732) INFO: 19epoch:train:4901-5000batch: iter_time=1.088e-04, forward_time=0.108, loss_ctc=81.536, loss_att=64.509, acc=0.679, loss=69.617, backward_time=0.751, grad_norm=111.664, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.453e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 19:55:30,048 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 19:55:48,920 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f887ff8aef0>)
+[gpua003:0/64] 2023-07-06 19:55:52,427 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-06 19:55:52,433 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:00:40,776 (trainer:732) INFO: 19epoch:train:5001-5100batch: iter_time=1.366, forward_time=0.108, loss_ctc=69.849, loss_att=51.729, acc=0.695, loss=57.165, backward_time=0.763, grad_norm=87.510, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.450e-05, train_time=6.265
+[gpua003:0/64] 2023-07-06 20:02:20,937 (trainer:732) INFO: 19epoch:train:5101-5200batch: iter_time=9.291e-05, forward_time=0.108, loss_ctc=70.810, loss_att=51.826, acc=0.701, loss=57.521, backward_time=0.752, grad_norm=89.983, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.448e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 20:04:03,241 (trainer:732) INFO: 19epoch:train:5201-5300batch: iter_time=9.771e-05, forward_time=0.109, loss_ctc=70.882, loss_att=52.510, acc=0.700, loss=58.021, backward_time=0.755, grad_norm=78.370, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.445e-05, train_time=2.046
+[gpua003:0/64] 2023-07-06 20:05:51,446 (trainer:732) INFO: 19epoch:train:5301-5400batch: iter_time=9.570e-05, forward_time=0.107, loss_ctc=69.962, loss_att=57.223, acc=0.687, loss=61.045, backward_time=0.760, grad_norm=91.273, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.443e-05, train_time=2.164
+[gpua003:0/64] 2023-07-06 20:07:31,682 (trainer:732) INFO: 19epoch:train:5401-5500batch: iter_time=9.179e-05, forward_time=0.107, loss_ctc=70.911, loss_att=55.955, acc=0.689, loss=60.442, backward_time=0.751, grad_norm=91.242, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.441e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 20:09:13,295 (trainer:732) INFO: 19epoch:train:5501-5600batch: iter_time=9.333e-05, forward_time=0.107, loss_ctc=72.418, loss_att=59.104, acc=0.701, loss=63.098, backward_time=0.754, grad_norm=97.947, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.438e-05, train_time=2.032
+[gpua003:0/64] 2023-07-06 20:10:53,158 (trainer:732) INFO: 19epoch:train:5601-5700batch: iter_time=9.882e-05, forward_time=0.107, loss_ctc=66.916, loss_att=50.106, acc=0.718, loss=55.149, backward_time=0.751, grad_norm=81.033, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.436e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 20:12:34,924 (trainer:732) INFO: 19epoch:train:5701-5800batch: iter_time=9.385e-05, forward_time=0.108, loss_ctc=82.933, loss_att=68.684, acc=0.688, loss=72.959, backward_time=0.756, grad_norm=95.394, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.112, optim0_lr0=8.433e-05, train_time=2.035
+[gpua003:0/64] 2023-07-06 20:13:11,362 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-06 20:13:30,240 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f887ff62200>)
+[gpua003:0/64] 2023-07-06 20:13:33,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 20:13:33,736 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:17:25,315 (trainer:732) INFO: 19epoch:train:5801-5900batch: iter_time=1.332, forward_time=0.109, loss_ctc=72.517, loss_att=53.535, acc=0.694, loss=59.230, backward_time=0.764, grad_norm=91.357, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.431e-05, train_time=5.808
+[gpua003:0/64] 2023-07-06 20:19:05,549 (trainer:732) INFO: 19epoch:train:5901-6000batch: iter_time=9.979e-05, forward_time=0.108, loss_ctc=72.968, loss_att=53.312, acc=0.689, loss=59.209, backward_time=0.753, grad_norm=105.818, clip=100.000, loss_scale=5.765e+17, optim_step_time=0.113, optim0_lr0=8.429e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 20:20:45,322 (trainer:732) INFO: 19epoch:train:6001-6100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=72.912, loss_att=54.448, acc=0.697, loss=59.987, backward_time=0.751, grad_norm=86.049, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.426e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 20:22:41,206 (trainer:732) INFO: 19epoch:train:6101-6200batch: iter_time=9.789e-05, forward_time=0.115, loss_ctc=67.667, loss_att=52.369, acc=0.694, loss=56.958, backward_time=0.773, grad_norm=79.432, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.424e-05, train_time=2.317
+[gpua003:0/64] 2023-07-06 20:24:21,811 (trainer:732) INFO: 19epoch:train:6201-6300batch: iter_time=1.002e-04, forward_time=0.109, loss_ctc=72.569, loss_att=58.922, acc=0.692, loss=63.016, backward_time=0.756, grad_norm=114.121, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.421e-05, train_time=2.012
+[gpua003:0/64] 2023-07-06 20:26:02,019 (trainer:732) INFO: 19epoch:train:6301-6400batch: iter_time=1.042e-04, forward_time=0.110, loss_ctc=67.317, loss_att=54.258, acc=0.696, loss=58.176, backward_time=0.753, grad_norm=86.928, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.419e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 20:27:41,637 (trainer:732) INFO: 19epoch:train:6401-6500batch: iter_time=1.161e-04, forward_time=0.108, loss_ctc=66.587, loss_att=54.003, acc=0.706, loss=57.778, backward_time=0.750, grad_norm=93.696, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.417e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 20:29:21,369 (trainer:732) INFO: 19epoch:train:6501-6600batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=78.539, loss_att=62.535, acc=0.711, loss=67.336, backward_time=0.751, grad_norm=87.737, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.414e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 20:30:33,131 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-06 20:30:52,251 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8f2fd2b010>)
+[gpua003:0/64] 2023-07-06 20:30:55,813 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 20:30:55,820 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:35:42,565 (trainer:732) INFO: 19epoch:train:6601-6700batch: iter_time=1.437, forward_time=0.110, loss_ctc=76.173, loss_att=59.992, acc=0.683, loss=64.847, backward_time=0.771, grad_norm=92.415, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.412e-05, train_time=7.624
+[gpua003:0/64] 2023-07-06 20:37:35,033 (trainer:732) INFO: 19epoch:train:6701-6800batch: iter_time=2.117e-04, forward_time=0.111, loss_ctc=69.258, loss_att=50.177, acc=0.703, loss=55.902, backward_time=0.765, grad_norm=87.979, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.410e-05, train_time=2.249
+[gpua003:0/64] 2023-07-06 20:39:33,155 (trainer:732) INFO: 19epoch:train:6801-6900batch: iter_time=8.933e-05, forward_time=0.110, loss_ctc=76.772, loss_att=57.481, acc=0.692, loss=63.268, backward_time=0.809, grad_norm=113.312, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.407e-05, train_time=2.362
+[gpua003:0/64] 2023-07-06 20:41:18,407 (trainer:732) INFO: 19epoch:train:6901-7000batch: iter_time=8.764e-05, forward_time=0.107, loss_ctc=69.838, loss_att=51.828, acc=0.699, loss=57.231, backward_time=0.770, grad_norm=80.826, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.405e-05, train_time=2.105
+[gpua003:0/64] 2023-07-06 20:43:01,811 (trainer:732) INFO: 19epoch:train:7001-7100batch: iter_time=8.895e-05, forward_time=0.108, loss_ctc=65.767, loss_att=52.168, acc=0.697, loss=56.247, backward_time=0.757, grad_norm=105.272, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.402e-05, train_time=2.068
+[gpua003:0/64] 2023-07-06 20:44:41,509 (trainer:732) INFO: 19epoch:train:7101-7200batch: iter_time=9.980e-05, forward_time=0.108, loss_ctc=71.601, loss_att=60.582, acc=0.689, loss=63.888, backward_time=0.751, grad_norm=87.586, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.400e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 20:46:21,090 (trainer:732) INFO: 19epoch:train:7201-7300batch: iter_time=1.134e-04, forward_time=0.108, loss_ctc=70.650, loss_att=53.644, acc=0.707, loss=58.745, backward_time=0.750, grad_norm=87.476, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.398e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 20:48:02,044 (trainer:732) INFO: 19epoch:train:7301-7400batch: iter_time=8.952e-05, forward_time=0.108, loss_ctc=72.244, loss_att=60.016, acc=0.710, loss=63.685, backward_time=0.752, grad_norm=87.451, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.395e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 20:49:41,828 (trainer:732) INFO: 19epoch:train:7401-7500batch: iter_time=8.351e-05, forward_time=0.108, loss_ctc=78.571, loss_att=61.691, acc=0.689, loss=66.755, backward_time=0.751, grad_norm=104.440, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.393e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 20:49:52,973 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-06 20:50:12,449 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d19777f0>)
+[gpua003:0/64] 2023-07-06 20:50:15,993 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 20:50:15,999 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 20:54:12,385 (trainer:732) INFO: 19epoch:train:7501-7600batch: iter_time=1.571, forward_time=0.130, loss_ctc=72.069, loss_att=54.503, acc=0.682, loss=59.773, backward_time=0.763, grad_norm=94.845, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.391e-05, train_time=5.411
+[gpua003:0/64] 2023-07-06 20:55:52,369 (trainer:732) INFO: 19epoch:train:7601-7700batch: iter_time=9.053e-05, forward_time=0.107, loss_ctc=69.256, loss_att=49.020, acc=0.700, loss=55.091, backward_time=0.751, grad_norm=86.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.388e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 20:57:32,499 (trainer:732) INFO: 19epoch:train:7701-7800batch: iter_time=8.971e-05, forward_time=0.107, loss_ctc=71.084, loss_att=55.278, acc=0.687, loss=60.020, backward_time=0.750, grad_norm=85.779, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.386e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 20:59:12,329 (trainer:732) INFO: 19epoch:train:7801-7900batch: iter_time=1.121e-04, forward_time=0.109, loss_ctc=69.918, loss_att=55.616, acc=0.691, loss=59.906, backward_time=0.752, grad_norm=90.907, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.383e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 21:00:52,131 (trainer:732) INFO: 19epoch:train:7901-8000batch: iter_time=1.091e-04, forward_time=0.110, loss_ctc=72.031, loss_att=58.828, acc=0.687, loss=62.788, backward_time=0.752, grad_norm=90.990, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.381e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 21:02:31,776 (trainer:732) INFO: 19epoch:train:8001-8100batch: iter_time=1.210e-04, forward_time=0.109, loss_ctc=69.895, loss_att=59.175, acc=0.679, loss=62.391, backward_time=0.751, grad_norm=96.311, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.379e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 21:04:11,333 (trainer:732) INFO: 19epoch:train:8101-8200batch: iter_time=1.196e-04, forward_time=0.109, loss_ctc=65.075, loss_att=46.957, acc=0.713, loss=52.393, backward_time=0.752, grad_norm=79.989, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.376e-05, train_time=1.991
+[gpua003:0/64] 2023-07-06 21:05:50,952 (trainer:732) INFO: 19epoch:train:8201-8300batch: iter_time=1.100e-04, forward_time=0.109, loss_ctc=83.512, loss_att=69.549, acc=0.682, loss=73.738, backward_time=0.752, grad_norm=92.984, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.374e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 21:06:26,463 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-06 21:06:45,859 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a4270130>)
+[gpua003:0/64] 2023-07-06 21:06:49,639 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 21:06:49,645 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 21:10:21,900 (trainer:732) INFO: 19epoch:train:8301-8400batch: iter_time=1.289, forward_time=0.109, loss_ctc=73.988, loss_att=57.572, acc=0.681, loss=62.497, backward_time=0.774, grad_norm=92.924, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.372e-05, train_time=5.419
+[gpua003:0/64] 2023-07-06 21:12:03,025 (trainer:732) INFO: 19epoch:train:8401-8500batch: iter_time=9.593e-05, forward_time=0.108, loss_ctc=71.684, loss_att=50.782, acc=0.691, loss=57.053, backward_time=0.753, grad_norm=91.974, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.369e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 21:13:42,791 (trainer:732) INFO: 19epoch:train:8501-8600batch: iter_time=8.646e-05, forward_time=0.109, loss_ctc=74.130, loss_att=56.299, acc=0.692, loss=61.648, backward_time=0.753, grad_norm=87.929, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.367e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 21:15:23,522 (trainer:732) INFO: 19epoch:train:8601-8700batch: iter_time=9.629e-05, forward_time=0.109, loss_ctc=68.342, loss_att=52.472, acc=0.685, loss=57.233, backward_time=0.753, grad_norm=89.721, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.365e-05, train_time=2.014
+[gpua003:0/64] 2023-07-06 21:17:03,161 (trainer:732) INFO: 19epoch:train:8701-8800batch: iter_time=9.716e-05, forward_time=0.108, loss_ctc=72.982, loss_att=57.376, acc=0.697, loss=62.058, backward_time=0.751, grad_norm=113.686, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.362e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 21:18:43,104 (trainer:732) INFO: 19epoch:train:8801-8900batch: iter_time=9.576e-05, forward_time=0.109, loss_ctc=66.077, loss_att=53.057, acc=0.696, loss=56.963, backward_time=0.753, grad_norm=81.859, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.360e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 21:20:22,718 (trainer:732) INFO: 19epoch:train:8901-9000batch: iter_time=1.008e-04, forward_time=0.108, loss_ctc=65.904, loss_att=53.143, acc=0.700, loss=56.972, backward_time=0.751, grad_norm=84.308, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.358e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 21:22:02,446 (trainer:732) INFO: 19epoch:train:9001-9100batch: iter_time=9.656e-05, forward_time=0.108, loss_ctc=77.892, loss_att=65.356, acc=0.694, loss=69.117, backward_time=0.752, grad_norm=101.891, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.355e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 21:23:10,807 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-06 21:23:29,796 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a4273610>)
+[gpua003:0/64] 2023-07-06 21:23:33,339 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 21:23:33,345 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 21:27:24,475 (trainer:732) INFO: 19epoch:train:9101-9200batch: iter_time=1.327, forward_time=0.109, loss_ctc=75.034, loss_att=57.612, acc=0.680, loss=62.839, backward_time=0.762, grad_norm=88.253, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.353e-05, train_time=6.440
+[gpua003:0/64] 2023-07-06 21:29:05,161 (trainer:732) INFO: 19epoch:train:9201-9300batch: iter_time=9.736e-05, forward_time=0.108, loss_ctc=71.238, loss_att=51.142, acc=0.705, loss=57.170, backward_time=0.754, grad_norm=81.302, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.351e-05, train_time=2.013
+[gpua003:0/64] 2023-07-06 21:30:46,142 (trainer:732) INFO: 19epoch:train:9301-9400batch: iter_time=1.061e-04, forward_time=0.109, loss_ctc=74.369, loss_att=57.598, acc=0.690, loss=62.630, backward_time=0.752, grad_norm=101.798, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.348e-05, train_time=2.019
+[gpua003:0/64] 2023-07-06 21:32:26,233 (trainer:732) INFO: 19epoch:train:9401-9500batch: iter_time=9.370e-05, forward_time=0.109, loss_ctc=67.835, loss_att=50.281, acc=0.706, loss=55.547, backward_time=0.752, grad_norm=88.722, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.346e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 21:34:06,640 (trainer:732) INFO: 19epoch:train:9501-9600batch: iter_time=1.055e-04, forward_time=0.110, loss_ctc=64.836, loss_att=52.472, acc=0.696, loss=56.181, backward_time=0.753, grad_norm=90.882, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.344e-05, train_time=2.008
+[gpua003:0/64] 2023-07-06 21:36:03,018 (trainer:732) INFO: 19epoch:train:9601-9700batch: iter_time=7.040e-04, forward_time=0.150, loss_ctc=70.591, loss_att=59.315, acc=0.695, loss=62.698, backward_time=0.779, grad_norm=92.334, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.114, optim0_lr0=8.341e-05, train_time=2.327
+[gpua003:0/64] 2023-07-06 21:37:44,980 (trainer:732) INFO: 19epoch:train:9701-9800batch: iter_time=1.046e-04, forward_time=0.108, loss_ctc=69.523, loss_att=55.550, acc=0.707, loss=59.741, backward_time=0.753, grad_norm=96.147, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.339e-05, train_time=2.039
+[gpua003:0/64] 2023-07-06 21:39:24,859 (trainer:732) INFO: 19epoch:train:9801-9900batch: iter_time=1.053e-04, forward_time=0.110, loss_ctc=73.187, loss_att=59.236, acc=0.717, loss=63.421, backward_time=0.753, grad_norm=85.059, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.113, optim0_lr0=8.337e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 21:41:04,508 (trainer:732) INFO: 19epoch:train:9901-10000batch: iter_time=9.273e-05, forward_time=0.108, loss_ctc=79.916, loss_att=59.698, acc=0.692, loss=65.764, backward_time=0.751, grad_norm=87.679, clip=100.000, loss_scale=1.153e+18, optim_step_time=0.112, optim0_lr0=8.334e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 21:53:26,999 (trainer:338) INFO: 19epoch results: [train] iter_time=0.201, forward_time=0.110, loss_ctc=72.442, loss_att=56.449, acc=0.691, loss=61.247, backward_time=0.757, grad_norm=94.422, clip=100.000, loss_scale=7.494e+17, optim_step_time=0.113, optim0_lr0=8.452e-05, train_time=2.614, time=3 hours, 38 minutes and 13.18 seconds, total_count=160000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.578, cer_ctc=0.291, loss_att=43.395, acc=0.637, cer=0.442, wer=1.000, loss=45.550, time=5 minutes and 44.01 seconds, total_count=16698, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 18.47 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-06 21:53:45,891 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-06 21:53:45,932 (trainer:272) INFO: 20/100epoch started. Estimated time to finish: 1 week, 6 days and 1 hour
+[gpua003:0/64] 2023-07-06 21:53:46,828 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-06 21:54:05,845 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a2bba020>)
+[gpua003:0/64] 2023-07-06 21:54:10,947 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-06 21:54:11,034 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:01:50,792 (trainer:732) INFO: 20epoch:train:1-100batch: iter_time=3.782, forward_time=0.136, loss_ctc=71.854, loss_att=51.948, acc=0.710, loss=57.919, backward_time=0.767, grad_norm=91.560, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.332e-05, train_time=9.690
+[gpua003:0/64] 2023-07-06 22:03:30,984 (trainer:732) INFO: 20epoch:train:101-200batch: iter_time=9.746e-05, forward_time=0.108, loss_ctc=68.269, loss_att=50.073, acc=0.698, loss=55.532, backward_time=0.752, grad_norm=89.704, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.330e-05, train_time=2.004
+[gpua003:0/64] 2023-07-06 22:05:13,770 (trainer:732) INFO: 20epoch:train:201-300batch: iter_time=1.101e-04, forward_time=0.109, loss_ctc=76.182, loss_att=54.086, acc=0.680, loss=60.715, backward_time=0.754, grad_norm=92.398, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.327e-05, train_time=2.056
+[gpua003:0/64] 2023-07-06 22:06:54,574 (trainer:732) INFO: 20epoch:train:301-400batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=72.772, loss_att=56.293, acc=0.685, loss=61.237, backward_time=0.751, grad_norm=105.066, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.325e-05, train_time=2.016
+[gpua003:0/64] 2023-07-06 22:08:34,689 (trainer:732) INFO: 20epoch:train:401-500batch: iter_time=1.021e-04, forward_time=0.106, loss_ctc=75.522, loss_att=55.332, acc=0.684, loss=61.389, backward_time=0.749, grad_norm=97.945, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.323e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 22:10:14,287 (trainer:732) INFO: 20epoch:train:501-600batch: iter_time=1.003e-04, forward_time=0.107, loss_ctc=73.512, loss_att=56.609, acc=0.702, loss=61.680, backward_time=0.750, grad_norm=84.685, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.321e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 22:11:54,452 (trainer:732) INFO: 20epoch:train:601-700batch: iter_time=1.043e-04, forward_time=0.107, loss_ctc=78.698, loss_att=54.562, acc=0.686, loss=61.803, backward_time=0.750, grad_norm=98.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.318e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 22:13:40,823 (trainer:732) INFO: 20epoch:train:701-800batch: iter_time=1.096e-04, forward_time=0.108, loss_ctc=89.847, loss_att=64.589, acc=0.689, loss=72.167, backward_time=0.762, grad_norm=100.675, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.316e-05, train_time=2.127
+[gpua003:0/64] 2023-07-06 22:14:20,761 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-06 22:14:39,405 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89e9b63d60>)
+[gpua003:0/64] 2023-07-06 22:14:43,119 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-06 22:14:43,125 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:18:32,284 (trainer:732) INFO: 20epoch:train:801-900batch: iter_time=1.362, forward_time=0.108, loss_ctc=71.198, loss_att=53.976, acc=0.705, loss=59.142, backward_time=0.768, grad_norm=80.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.314e-05, train_time=5.829
+[gpua003:0/64] 2023-07-06 22:20:13,201 (trainer:732) INFO: 20epoch:train:901-1000batch: iter_time=9.604e-05, forward_time=0.108, loss_ctc=67.658, loss_att=48.534, acc=0.705, loss=54.271, backward_time=0.755, grad_norm=86.115, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.311e-05, train_time=2.018
+[gpua003:0/64] 2023-07-06 22:21:53,069 (trainer:732) INFO: 20epoch:train:1001-1100batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=75.100, loss_att=54.061, acc=0.692, loss=60.372, backward_time=0.753, grad_norm=94.035, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.309e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 22:23:34,205 (trainer:732) INFO: 20epoch:train:1101-1200batch: iter_time=1.004e-04, forward_time=0.108, loss_ctc=75.994, loss_att=54.925, acc=0.686, loss=61.246, backward_time=0.754, grad_norm=94.166, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.307e-05, train_time=2.022
+[gpua003:0/64] 2023-07-06 22:25:14,385 (trainer:732) INFO: 20epoch:train:1201-1300batch: iter_time=9.688e-05, forward_time=0.108, loss_ctc=68.867, loss_att=52.161, acc=0.694, loss=57.173, backward_time=0.752, grad_norm=88.133, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.304e-05, train_time=2.003
+[gpua003:0/64] 2023-07-06 22:26:54,236 (trainer:732) INFO: 20epoch:train:1301-1400batch: iter_time=9.936e-05, forward_time=0.108, loss_ctc=74.280, loss_att=59.054, acc=0.691, loss=63.622, backward_time=0.753, grad_norm=81.828, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.302e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 22:28:34,189 (trainer:732) INFO: 20epoch:train:1401-1500batch: iter_time=9.366e-05, forward_time=0.108, loss_ctc=72.329, loss_att=51.203, acc=0.701, loss=57.541, backward_time=0.752, grad_norm=88.414, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.300e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 22:30:15,525 (trainer:732) INFO: 20epoch:train:1501-1600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=91.703, loss_att=61.724, acc=0.683, loss=70.718, backward_time=0.754, grad_norm=106.713, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.114, optim0_lr0=8.298e-05, train_time=2.026
+[gpua003:0/64] 2023-07-06 22:31:22,752 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-06 22:31:42,368 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f896d168970>)
+[gpua003:0/64] 2023-07-06 22:31:46,184 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-06 22:31:46,190 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:35:07,262 (trainer:732) INFO: 20epoch:train:1601-1700batch: iter_time=1.291, forward_time=0.108, loss_ctc=71.960, loss_att=56.558, acc=0.711, loss=61.178, backward_time=0.764, grad_norm=84.868, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.295e-05, train_time=5.835
+[gpua003:0/64] 2023-07-06 22:36:48,148 (trainer:732) INFO: 20epoch:train:1701-1800batch: iter_time=1.106e-04, forward_time=0.109, loss_ctc=70.844, loss_att=53.568, acc=0.697, loss=58.751, backward_time=0.755, grad_norm=109.667, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.293e-05, train_time=2.017
+[gpua003:0/64] 2023-07-06 22:38:27,849 (trainer:732) INFO: 20epoch:train:1801-1900batch: iter_time=9.001e-05, forward_time=0.108, loss_ctc=70.234, loss_att=49.729, acc=0.708, loss=55.881, backward_time=0.752, grad_norm=104.621, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.291e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 22:40:07,934 (trainer:732) INFO: 20epoch:train:1901-2000batch: iter_time=9.188e-05, forward_time=0.109, loss_ctc=74.679, loss_att=53.534, acc=0.678, loss=59.877, backward_time=0.753, grad_norm=99.994, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.288e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 22:41:47,719 (trainer:732) INFO: 20epoch:train:2001-2100batch: iter_time=9.469e-05, forward_time=0.109, loss_ctc=72.452, loss_att=57.103, acc=0.685, loss=61.708, backward_time=0.753, grad_norm=88.374, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.286e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 22:43:27,515 (trainer:732) INFO: 20epoch:train:2101-2200batch: iter_time=9.559e-05, forward_time=0.108, loss_ctc=72.276, loss_att=54.565, acc=0.690, loss=59.878, backward_time=0.753, grad_norm=85.377, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.284e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 22:45:07,265 (trainer:732) INFO: 20epoch:train:2201-2300batch: iter_time=1.133e-04, forward_time=0.109, loss_ctc=70.952, loss_att=53.601, acc=0.697, loss=58.807, backward_time=0.752, grad_norm=91.829, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.282e-05, train_time=1.995
+[gpua003:0/64] 2023-07-06 22:46:46,964 (trainer:732) INFO: 20epoch:train:2301-2400batch: iter_time=1.050e-04, forward_time=0.109, loss_ctc=82.613, loss_att=58.490, acc=0.677, loss=65.727, backward_time=0.751, grad_norm=108.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.279e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 22:48:27,085 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-06 22:48:46,472 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944b98b760>)
+[gpua003:0/64] 2023-07-06 22:48:50,356 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-06 22:48:50,362 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 22:53:34,506 (trainer:732) INFO: 20epoch:train:2401-2500batch: iter_time=2.844, forward_time=0.130, loss_ctc=80.783, loss_att=60.279, acc=0.694, loss=66.430, backward_time=0.756, grad_norm=95.454, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.115, optim0_lr0=8.277e-05, train_time=8.151
+[gpua003:0/64] 2023-07-06 22:55:17,165 (trainer:732) INFO: 20epoch:train:2501-2600batch: iter_time=9.896e-05, forward_time=0.113, loss_ctc=70.675, loss_att=49.400, acc=0.713, loss=55.783, backward_time=0.761, grad_norm=88.563, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.275e-05, train_time=2.053
+[gpua003:0/64] 2023-07-06 22:56:57,735 (trainer:732) INFO: 20epoch:train:2601-2700batch: iter_time=9.556e-05, forward_time=0.108, loss_ctc=74.837, loss_att=54.096, acc=0.700, loss=60.318, backward_time=0.756, grad_norm=95.551, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.273e-05, train_time=2.011
+[gpua003:0/64] 2023-07-06 22:58:38,514 (trainer:732) INFO: 20epoch:train:2701-2800batch: iter_time=9.973e-05, forward_time=0.107, loss_ctc=75.135, loss_att=51.859, acc=0.679, loss=58.842, backward_time=0.752, grad_norm=88.078, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.270e-05, train_time=2.015
+[gpua003:0/64] 2023-07-06 23:00:20,949 (trainer:732) INFO: 20epoch:train:2801-2900batch: iter_time=9.541e-05, forward_time=0.108, loss_ctc=69.255, loss_att=55.518, acc=0.700, loss=59.639, backward_time=0.755, grad_norm=80.641, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.268e-05, train_time=2.048
+[gpua003:0/64] 2023-07-06 23:02:14,547 (trainer:732) INFO: 20epoch:train:2901-3000batch: iter_time=9.836e-05, forward_time=0.109, loss_ctc=74.405, loss_att=53.787, acc=0.692, loss=59.972, backward_time=0.782, grad_norm=90.228, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.266e-05, train_time=2.272
+[gpua003:0/64] 2023-07-06 23:03:54,862 (trainer:732) INFO: 20epoch:train:3001-3100batch: iter_time=9.682e-05, forward_time=0.109, loss_ctc=68.569, loss_att=51.720, acc=0.703, loss=56.774, backward_time=0.753, grad_norm=80.182, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.264e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 23:05:36,742 (trainer:732) INFO: 20epoch:train:3101-3200batch: iter_time=9.416e-05, forward_time=0.110, loss_ctc=81.585, loss_att=57.636, acc=0.695, loss=64.820, backward_time=0.756, grad_norm=114.022, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.261e-05, train_time=2.037
+[gpua003:0/64] 2023-07-06 23:07:16,980 (trainer:732) INFO: 20epoch:train:3201-3300batch: iter_time=9.267e-05, forward_time=0.109, loss_ctc=83.278, loss_att=62.301, acc=0.690, loss=68.594, backward_time=0.752, grad_norm=102.383, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.259e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 23:07:51,559 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-06 23:08:11,129 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d27b7370>)
+[gpua003:0/64] 2023-07-06 23:08:14,689 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-06 23:08:14,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 23:13:03,956 (trainer:732) INFO: 20epoch:train:3301-3400batch: iter_time=1.301, forward_time=0.109, loss_ctc=78.861, loss_att=56.676, acc=0.700, loss=63.331, backward_time=0.769, grad_norm=114.958, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.257e-05, train_time=6.939
+[gpua003:0/64] 2023-07-06 23:14:44,012 (trainer:732) INFO: 20epoch:train:3401-3500batch: iter_time=9.664e-05, forward_time=0.108, loss_ctc=68.025, loss_att=50.075, acc=0.699, loss=55.460, backward_time=0.753, grad_norm=84.489, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.255e-05, train_time=2.001
+[gpua003:0/64] 2023-07-06 23:16:27,830 (trainer:732) INFO: 20epoch:train:3501-3600batch: iter_time=9.904e-05, forward_time=0.108, loss_ctc=74.312, loss_att=52.930, acc=0.693, loss=59.345, backward_time=0.765, grad_norm=93.179, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.252e-05, train_time=2.076
+[gpua003:0/64] 2023-07-06 23:18:08,117 (trainer:732) INFO: 20epoch:train:3601-3700batch: iter_time=1.068e-04, forward_time=0.110, loss_ctc=73.489, loss_att=54.029, acc=0.682, loss=59.867, backward_time=0.753, grad_norm=96.418, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.250e-05, train_time=2.006
+[gpua003:0/64] 2023-07-06 23:19:47,962 (trainer:732) INFO: 20epoch:train:3701-3800batch: iter_time=9.700e-05, forward_time=0.107, loss_ctc=67.104, loss_att=51.126, acc=0.699, loss=55.919, backward_time=0.751, grad_norm=85.122, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.248e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 23:21:27,624 (trainer:732) INFO: 20epoch:train:3801-3900batch: iter_time=1.038e-04, forward_time=0.107, loss_ctc=73.938, loss_att=60.096, acc=0.686, loss=64.249, backward_time=0.751, grad_norm=89.373, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.112, optim0_lr0=8.246e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 23:23:07,236 (trainer:732) INFO: 20epoch:train:3901-4000batch: iter_time=1.016e-04, forward_time=0.108, loss_ctc=72.519, loss_att=51.367, acc=0.692, loss=57.713, backward_time=0.752, grad_norm=97.338, clip=100.000, loss_scale=2.306e+18, optim_step_time=0.113, optim0_lr0=8.243e-05, train_time=1.992
+[gpua003:0/64] 2023-07-06 23:24:46,969 (trainer:732) INFO: 20epoch:train:4001-4100batch: iter_time=9.761e-05, forward_time=0.108, loss_ctc=88.906, loss_att=60.730, acc=0.683, loss=69.183, backward_time=0.751, grad_norm=111.352, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.241e-05, train_time=1.994
+[gpua003:0/64] 2023-07-06 23:25:53,953 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-06 23:26:13,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944b603430>)
+[gpua003:0/64] 2023-07-06 23:26:16,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-06 23:26:16,695 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 23:29:48,281 (trainer:732) INFO: 20epoch:train:4101-4200batch: iter_time=1.285, forward_time=0.108, loss_ctc=71.787, loss_att=56.750, acc=0.703, loss=61.261, backward_time=0.763, grad_norm=89.955, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.239e-05, train_time=6.026
+[gpua003:0/64] 2023-07-06 23:31:29,587 (trainer:732) INFO: 20epoch:train:4201-4300batch: iter_time=1.041e-04, forward_time=0.109, loss_ctc=70.768, loss_att=51.030, acc=0.713, loss=56.951, backward_time=0.758, grad_norm=86.739, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.237e-05, train_time=2.026
+[gpua003:0/64] 2023-07-06 23:33:09,537 (trainer:732) INFO: 20epoch:train:4301-4400batch: iter_time=1.101e-04, forward_time=0.108, loss_ctc=69.008, loss_att=48.198, acc=0.717, loss=54.441, backward_time=0.752, grad_norm=98.169, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.234e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 23:34:49,543 (trainer:732) INFO: 20epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.108, loss_ctc=73.392, loss_att=51.724, acc=0.689, loss=58.225, backward_time=0.753, grad_norm=99.811, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.232e-05, train_time=2.000
+[gpua003:0/64] 2023-07-06 23:36:29,633 (trainer:732) INFO: 20epoch:train:4501-4600batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=71.304, loss_att=56.277, acc=0.699, loss=60.785, backward_time=0.753, grad_norm=84.933, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.230e-05, train_time=2.002
+[gpua003:0/64] 2023-07-06 23:38:09,605 (trainer:732) INFO: 20epoch:train:4601-4700batch: iter_time=1.208e-04, forward_time=0.108, loss_ctc=70.498, loss_att=54.270, acc=0.697, loss=59.139, backward_time=0.752, grad_norm=83.394, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.228e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 23:39:49,275 (trainer:732) INFO: 20epoch:train:4701-4800batch: iter_time=1.190e-04, forward_time=0.107, loss_ctc=71.120, loss_att=52.173, acc=0.708, loss=57.857, backward_time=0.750, grad_norm=96.519, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.225e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 23:41:28,953 (trainer:732) INFO: 20epoch:train:4801-4900batch: iter_time=1.090e-04, forward_time=0.108, loss_ctc=81.570, loss_att=58.452, acc=0.689, loss=65.387, backward_time=0.750, grad_norm=95.091, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.223e-05, train_time=1.993
+[gpua003:0/64] 2023-07-06 23:43:09,201 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-06 23:43:28,281 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a4f675e10>)
+[gpua003:0/64] 2023-07-06 23:43:31,799 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-06 23:43:31,806 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-06 23:46:22,528 (trainer:732) INFO: 20epoch:train:4901-5000batch: iter_time=1.282, forward_time=0.108, loss_ctc=79.944, loss_att=59.404, acc=0.708, loss=65.566, backward_time=0.755, grad_norm=106.026, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=5.871
+[gpua003:0/64] 2023-07-06 23:48:04,124 (trainer:732) INFO: 20epoch:train:5001-5100batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=73.293, loss_att=52.693, acc=0.711, loss=58.873, backward_time=0.757, grad_norm=83.888, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.219e-05, train_time=2.032
+[gpua003:0/64] 2023-07-06 23:49:44,402 (trainer:732) INFO: 20epoch:train:5101-5200batch: iter_time=1.044e-04, forward_time=0.106, loss_ctc=67.920, loss_att=49.443, acc=0.700, loss=54.986, backward_time=0.751, grad_norm=92.376, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.217e-05, train_time=2.005
+[gpua003:0/64] 2023-07-06 23:51:24,233 (trainer:732) INFO: 20epoch:train:5201-5300batch: iter_time=1.049e-04, forward_time=0.106, loss_ctc=77.875, loss_att=55.129, acc=0.680, loss=61.953, backward_time=0.751, grad_norm=110.700, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.214e-05, train_time=1.996
+[gpua003:0/64] 2023-07-06 23:53:04,201 (trainer:732) INFO: 20epoch:train:5301-5400batch: iter_time=1.111e-04, forward_time=0.107, loss_ctc=66.903, loss_att=50.769, acc=0.700, loss=55.609, backward_time=0.751, grad_norm=80.652, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.212e-05, train_time=1.999
+[gpua003:0/64] 2023-07-06 23:54:50,887 (trainer:732) INFO: 20epoch:train:5401-5500batch: iter_time=1.171e-04, forward_time=0.107, loss_ctc=73.965, loss_att=54.060, acc=0.695, loss=60.031, backward_time=0.764, grad_norm=92.000, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.210e-05, train_time=2.133
+[gpua003:0/64] 2023-07-06 23:56:33,008 (trainer:732) INFO: 20epoch:train:5501-5600batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=72.161, loss_att=57.079, acc=0.703, loss=61.603, backward_time=0.754, grad_norm=88.705, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.208e-05, train_time=2.042
+[gpua003:0/64] 2023-07-06 23:58:12,856 (trainer:732) INFO: 20epoch:train:5601-5700batch: iter_time=1.017e-04, forward_time=0.109, loss_ctc=76.027, loss_att=53.040, acc=0.686, loss=59.936, backward_time=0.753, grad_norm=101.954, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.205e-05, train_time=1.997
+[gpua003:0/64] 2023-07-06 23:59:52,835 (trainer:732) INFO: 20epoch:train:5701-5800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=83.943, loss_att=63.565, acc=0.695, loss=69.678, backward_time=0.753, grad_norm=152.716, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.114, optim0_lr0=8.203e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 00:00:26,092 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-07 00:00:45,216 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8a4ea11d20>)
+[gpua003:0/64] 2023-07-07 00:00:48,757 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 00:00:48,764 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:05:13,212 (trainer:732) INFO: 20epoch:train:5801-5900batch: iter_time=1.287, forward_time=0.108, loss_ctc=69.493, loss_att=50.272, acc=0.713, loss=56.039, backward_time=0.767, grad_norm=90.186, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.201e-05, train_time=6.407
+[gpua003:0/64] 2023-07-07 00:06:53,937 (trainer:732) INFO: 20epoch:train:5901-6000batch: iter_time=9.985e-05, forward_time=0.107, loss_ctc=67.619, loss_att=48.654, acc=0.710, loss=54.344, backward_time=0.755, grad_norm=80.945, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.199e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 00:08:34,265 (trainer:732) INFO: 20epoch:train:6001-6100batch: iter_time=9.513e-05, forward_time=0.107, loss_ctc=72.368, loss_att=52.436, acc=0.699, loss=58.415, backward_time=0.754, grad_norm=99.544, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.197e-05, train_time=2.006
+[gpua003:0/64] 2023-07-07 00:10:14,094 (trainer:732) INFO: 20epoch:train:6101-6200batch: iter_time=1.063e-04, forward_time=0.107, loss_ctc=72.573, loss_att=53.094, acc=0.693, loss=58.938, backward_time=0.752, grad_norm=96.528, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.194e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 00:11:54,093 (trainer:732) INFO: 20epoch:train:6201-6300batch: iter_time=9.448e-05, forward_time=0.108, loss_ctc=67.484, loss_att=50.808, acc=0.704, loss=55.811, backward_time=0.753, grad_norm=108.855, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.192e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 00:13:34,349 (trainer:732) INFO: 20epoch:train:6301-6400batch: iter_time=1.004e-04, forward_time=0.107, loss_ctc=72.464, loss_att=56.785, acc=0.702, loss=61.489, backward_time=0.753, grad_norm=87.129, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.190e-05, train_time=2.005
+[gpua003:0/64] 2023-07-07 00:15:14,312 (trainer:732) INFO: 20epoch:train:6401-6500batch: iter_time=9.419e-05, forward_time=0.108, loss_ctc=71.280, loss_att=50.161, acc=0.709, loss=56.497, backward_time=0.754, grad_norm=98.013, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.188e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 00:16:54,147 (trainer:732) INFO: 20epoch:train:6501-6600batch: iter_time=1.010e-04, forward_time=0.107, loss_ctc=90.171, loss_att=63.851, acc=0.685, loss=71.747, backward_time=0.753, grad_norm=108.463, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.186e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 00:18:00,433 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-07 00:18:19,726 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88d4c9c6d0>)
+[gpua003:0/64] 2023-07-07 00:18:23,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 00:18:23,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:22:45,396 (trainer:732) INFO: 20epoch:train:6601-6700batch: iter_time=1.296, forward_time=0.108, loss_ctc=74.059, loss_att=56.550, acc=0.707, loss=61.803, backward_time=0.761, grad_norm=91.757, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.183e-05, train_time=7.025
+[gpua003:0/64] 2023-07-07 00:24:26,019 (trainer:732) INFO: 20epoch:train:6701-6800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.347, loss_att=50.947, acc=0.699, loss=56.467, backward_time=0.755, grad_norm=89.501, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.181e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 00:26:06,137 (trainer:732) INFO: 20epoch:train:6801-6900batch: iter_time=1.227e-04, forward_time=0.107, loss_ctc=70.173, loss_att=49.974, acc=0.698, loss=56.034, backward_time=0.751, grad_norm=91.982, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.179e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 00:27:45,899 (trainer:732) INFO: 20epoch:train:6901-7000batch: iter_time=1.212e-04, forward_time=0.108, loss_ctc=71.799, loss_att=52.784, acc=0.684, loss=58.488, backward_time=0.752, grad_norm=105.648, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.177e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 00:29:25,719 (trainer:732) INFO: 20epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.107, loss_ctc=69.357, loss_att=53.933, acc=0.692, loss=58.560, backward_time=0.752, grad_norm=90.499, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.175e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 00:31:05,462 (trainer:732) INFO: 20epoch:train:7101-7200batch: iter_time=1.086e-04, forward_time=0.108, loss_ctc=68.020, loss_att=53.832, acc=0.699, loss=58.088, backward_time=0.752, grad_norm=108.903, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.173e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 00:32:45,164 (trainer:732) INFO: 20epoch:train:7201-7300batch: iter_time=1.177e-04, forward_time=0.108, loss_ctc=71.562, loss_att=52.820, acc=0.698, loss=58.443, backward_time=0.753, grad_norm=92.492, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.170e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 00:34:25,792 (trainer:732) INFO: 20epoch:train:7301-7400batch: iter_time=1.149e-04, forward_time=0.108, loss_ctc=84.374, loss_att=58.985, acc=0.686, loss=66.602, backward_time=0.752, grad_norm=103.222, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.168e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 00:36:05,793 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-07 00:36:24,910 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f933d9419c0>)
+[gpua003:0/64] 2023-07-07 00:36:28,439 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 00:36:28,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:39:39,875 (trainer:732) INFO: 20epoch:train:7401-7500batch: iter_time=1.300, forward_time=0.108, loss_ctc=82.541, loss_att=62.936, acc=0.691, loss=68.817, backward_time=0.758, grad_norm=115.515, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.166e-05, train_time=6.281
+[gpua003:0/64] 2023-07-07 00:41:23,429 (trainer:732) INFO: 20epoch:train:7501-7600batch: iter_time=9.637e-05, forward_time=0.109, loss_ctc=69.225, loss_att=48.998, acc=0.720, loss=55.066, backward_time=0.763, grad_norm=96.997, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.164e-05, train_time=2.071
+[gpua003:0/64] 2023-07-07 00:43:03,727 (trainer:732) INFO: 20epoch:train:7601-7700batch: iter_time=1.074e-04, forward_time=0.107, loss_ctc=74.248, loss_att=54.779, acc=0.704, loss=60.620, backward_time=0.752, grad_norm=92.841, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.162e-05, train_time=2.006
+[gpua003:0/64] 2023-07-07 00:44:44,287 (trainer:732) INFO: 20epoch:train:7701-7800batch: iter_time=1.079e-04, forward_time=0.107, loss_ctc=73.724, loss_att=51.407, acc=0.683, loss=58.102, backward_time=0.753, grad_norm=97.431, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.112, optim0_lr0=8.159e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 00:46:24,185 (trainer:732) INFO: 20epoch:train:7801-7900batch: iter_time=9.605e-05, forward_time=0.108, loss_ctc=69.983, loss_att=56.359, acc=0.700, loss=60.446, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.157e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 00:48:04,263 (trainer:732) INFO: 20epoch:train:7901-8000batch: iter_time=9.780e-05, forward_time=0.109, loss_ctc=71.676, loss_att=53.364, acc=0.698, loss=58.857, backward_time=0.754, grad_norm=93.587, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.155e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 00:49:45,962 (trainer:732) INFO: 20epoch:train:8001-8100batch: iter_time=9.342e-05, forward_time=0.108, loss_ctc=67.152, loss_att=50.735, acc=0.707, loss=55.660, backward_time=0.754, grad_norm=96.944, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.153e-05, train_time=2.034
+[gpua003:0/64] 2023-07-07 00:51:26,537 (trainer:732) INFO: 20epoch:train:8101-8200batch: iter_time=9.864e-05, forward_time=0.108, loss_ctc=81.176, loss_att=58.498, acc=0.696, loss=65.302, backward_time=0.753, grad_norm=108.921, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.151e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 00:53:07,235 (trainer:732) INFO: 20epoch:train:8201-8300batch: iter_time=1.006e-04, forward_time=0.108, loss_ctc=79.556, loss_att=61.957, acc=0.691, loss=67.237, backward_time=0.752, grad_norm=109.467, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.149e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 00:53:41,518 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-07 00:54:00,863 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f933d957ac0>)
+[gpua003:0/64] 2023-07-07 00:54:04,680 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-07 00:54:04,686 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 00:58:25,770 (trainer:732) INFO: 20epoch:train:8301-8400batch: iter_time=1.273, forward_time=0.108, loss_ctc=78.310, loss_att=60.461, acc=0.698, loss=65.816, backward_time=0.767, grad_norm=117.559, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.146e-05, train_time=6.370
+[gpua003:0/64] 2023-07-07 01:00:07,312 (trainer:732) INFO: 20epoch:train:8401-8500batch: iter_time=1.152e-04, forward_time=0.108, loss_ctc=67.962, loss_att=49.896, acc=0.703, loss=55.316, backward_time=0.754, grad_norm=90.877, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.144e-05, train_time=2.031
+[gpua003:0/64] 2023-07-07 01:01:48,019 (trainer:732) INFO: 20epoch:train:8501-8600batch: iter_time=1.031e-04, forward_time=0.110, loss_ctc=68.584, loss_att=49.176, acc=0.698, loss=54.998, backward_time=0.755, grad_norm=93.992, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.142e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 01:03:36,281 (trainer:732) INFO: 20epoch:train:8601-8700batch: iter_time=9.241e-05, forward_time=0.108, loss_ctc=75.435, loss_att=53.797, acc=0.684, loss=60.288, backward_time=0.759, grad_norm=99.257, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.140e-05, train_time=2.165
+[gpua003:0/64] 2023-07-07 01:05:22,596 (trainer:732) INFO: 20epoch:train:8701-8800batch: iter_time=1.007e-04, forward_time=0.108, loss_ctc=69.540, loss_att=53.032, acc=0.705, loss=57.984, backward_time=0.759, grad_norm=88.332, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.138e-05, train_time=2.126
+[gpua003:0/64] 2023-07-07 01:07:02,503 (trainer:732) INFO: 20epoch:train:8801-8900batch: iter_time=1.033e-04, forward_time=0.108, loss_ctc=71.565, loss_att=57.189, acc=0.691, loss=61.502, backward_time=0.751, grad_norm=90.049, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.136e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 01:08:47,374 (trainer:732) INFO: 20epoch:train:8901-9000batch: iter_time=9.529e-05, forward_time=0.108, loss_ctc=74.747, loss_att=50.418, acc=0.695, loss=57.717, backward_time=0.757, grad_norm=98.679, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.134e-05, train_time=2.097
+[gpua003:0/64] 2023-07-07 01:10:27,736 (trainer:732) INFO: 20epoch:train:9001-9100batch: iter_time=9.856e-05, forward_time=0.107, loss_ctc=83.548, loss_att=63.248, acc=0.685, loss=69.338, backward_time=0.751, grad_norm=111.462, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.131e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 01:11:37,029 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-07 01:11:56,346 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88772833d0>)
+[gpua003:0/64] 2023-07-07 01:12:00,135 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-07 01:12:00,141 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 01:16:34,776 (trainer:732) INFO: 20epoch:train:9101-9200batch: iter_time=1.308, forward_time=0.108, loss_ctc=72.185, loss_att=56.405, acc=0.695, loss=61.139, backward_time=0.773, grad_norm=115.543, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.129e-05, train_time=7.341
+[gpua003:0/64] 2023-07-07 01:18:15,925 (trainer:732) INFO: 20epoch:train:9201-9300batch: iter_time=9.792e-05, forward_time=0.106, loss_ctc=71.745, loss_att=52.813, acc=0.704, loss=58.492, backward_time=0.754, grad_norm=89.778, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.127e-05, train_time=2.023
+[gpua003:0/64] 2023-07-07 01:19:59,197 (trainer:732) INFO: 20epoch:train:9301-9400batch: iter_time=9.737e-05, forward_time=0.107, loss_ctc=69.353, loss_att=48.395, acc=0.713, loss=54.682, backward_time=0.753, grad_norm=88.881, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.125e-05, train_time=2.065
+[gpua003:0/64] 2023-07-07 01:21:40,857 (trainer:732) INFO: 20epoch:train:9401-9500batch: iter_time=1.001e-04, forward_time=0.107, loss_ctc=72.030, loss_att=51.469, acc=0.684, loss=57.637, backward_time=0.753, grad_norm=97.051, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.123e-05, train_time=2.033
+[gpua003:0/64] 2023-07-07 01:23:21,684 (trainer:732) INFO: 20epoch:train:9501-9600batch: iter_time=9.299e-05, forward_time=0.107, loss_ctc=71.994, loss_att=56.267, acc=0.690, loss=60.985, backward_time=0.753, grad_norm=91.397, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.121e-05, train_time=2.016
+[gpua003:0/64] 2023-07-07 01:25:01,632 (trainer:732) INFO: 20epoch:train:9601-9700batch: iter_time=1.032e-04, forward_time=0.108, loss_ctc=70.203, loss_att=53.311, acc=0.696, loss=58.379, backward_time=0.753, grad_norm=104.136, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.118e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 01:26:41,518 (trainer:732) INFO: 20epoch:train:9701-9800batch: iter_time=8.494e-05, forward_time=0.107, loss_ctc=70.139, loss_att=51.677, acc=0.703, loss=57.216, backward_time=0.753, grad_norm=87.042, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.116e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 01:28:24,627 (trainer:732) INFO: 20epoch:train:9801-9900batch: iter_time=9.340e-05, forward_time=0.107, loss_ctc=82.680, loss_att=58.302, acc=0.679, loss=65.616, backward_time=0.763, grad_norm=95.875, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.114e-05, train_time=2.062
+[gpua003:0/64] 2023-07-07 01:30:09,616 (trainer:732) INFO: 20epoch:train:9901-10000batch: iter_time=9.389e-05, forward_time=0.107, loss_ctc=77.669, loss_att=57.909, acc=0.701, loss=63.837, backward_time=0.757, grad_norm=107.681, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.112e-05, train_time=2.100
+[gpua003:0/64] 2023-07-07 01:42:21,427 (trainer:338) INFO: 20epoch results: [train] iter_time=0.196, forward_time=0.108, loss_ctc=73.961, loss_att=54.526, acc=0.696, loss=60.357, backward_time=0.755, grad_norm=96.055, clip=100.000, loss_scale=4.612e+18, optim_step_time=0.113, optim0_lr0=8.221e-05, train_time=2.596, time=3 hours, 36 minutes and 32.76 seconds, total_count=170000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=49.387, cer_ctc=0.283, loss_att=42.558, acc=0.643, cer=0.424, wer=0.999, loss=44.607, time=5 minutes and 50.92 seconds, total_count=17710, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 11.77 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-07 01:42:37,043 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-07 01:42:37,073 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till20epoch.pth
+[gpua003:0/64] 2023-07-07 01:43:32,066 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till20epoch.pth
+[gpua003:0/64] 2023-07-07 01:43:57,407 (trainer:272) INFO: 21/100epoch started. Estimated time to finish: 1 week, 5 days and 21 hours
+[gpua003:0/64] 2023-07-07 01:43:58,913 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-07 01:44:18,582 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f95210cae60>)
+[gpua003:0/64] 2023-07-07 01:44:24,346 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-07 01:44:24,417 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 01:50:53,031 (trainer:732) INFO: 21epoch:train:1-100batch: iter_time=3.069, forward_time=0.137, loss_ctc=75.425, loss_att=60.964, acc=0.696, loss=65.302, backward_time=0.770, grad_norm=105.263, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.117, optim0_lr0=8.110e-05, train_time=8.297
+[gpua003:0/64] 2023-07-07 01:52:34,428 (trainer:732) INFO: 21epoch:train:101-200batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=66.860, loss_att=53.856, acc=0.695, loss=57.757, backward_time=0.754, grad_norm=87.309, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.108e-05, train_time=2.028
+[gpua003:0/64] 2023-07-07 01:54:16,169 (trainer:732) INFO: 21epoch:train:201-300batch: iter_time=1.017e-04, forward_time=0.108, loss_ctc=68.585, loss_att=49.706, acc=0.718, loss=55.369, backward_time=0.751, grad_norm=89.606, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.106e-05, train_time=2.035
+[gpua003:0/64] 2023-07-07 01:55:56,901 (trainer:732) INFO: 21epoch:train:301-400batch: iter_time=9.549e-05, forward_time=0.109, loss_ctc=83.868, loss_att=59.563, acc=0.677, loss=66.854, backward_time=0.751, grad_norm=118.007, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.104e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 01:57:38,518 (trainer:732) INFO: 21epoch:train:401-500batch: iter_time=8.947e-05, forward_time=0.108, loss_ctc=75.494, loss_att=55.952, acc=0.710, loss=61.815, backward_time=0.753, grad_norm=96.012, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.101e-05, train_time=2.032
+[gpua003:0/64] 2023-07-07 01:59:25,089 (trainer:732) INFO: 21epoch:train:501-600batch: iter_time=8.360e-05, forward_time=0.107, loss_ctc=77.597, loss_att=58.124, acc=0.678, loss=63.966, backward_time=0.760, grad_norm=95.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.099e-05, train_time=2.131
+[gpua003:0/64] 2023-07-07 02:01:14,358 (trainer:732) INFO: 21epoch:train:601-700batch: iter_time=8.784e-05, forward_time=0.108, loss_ctc=79.901, loss_att=58.953, acc=0.688, loss=65.237, backward_time=0.766, grad_norm=109.677, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.097e-05, train_time=2.185
+[gpua003:0/64] 2023-07-07 02:03:06,286 (trainer:732) INFO: 21epoch:train:701-800batch: iter_time=8.960e-05, forward_time=0.108, loss_ctc=76.140, loss_att=52.512, acc=0.702, loss=59.600, backward_time=0.764, grad_norm=108.446, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.095e-05, train_time=2.238
+[gpua003:0/64] 2023-07-07 02:03:45,470 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-07 02:04:04,788 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a7512ad0>)
+[gpua003:0/64] 2023-07-07 02:04:08,666 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-07 02:04:08,672 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 02:08:05,629 (trainer:732) INFO: 21epoch:train:801-900batch: iter_time=1.390, forward_time=0.152, loss_ctc=78.626, loss_att=63.039, acc=0.675, loss=67.715, backward_time=0.773, grad_norm=107.084, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.093e-05, train_time=5.987
+[gpua003:0/64] 2023-07-07 02:09:45,991 (trainer:732) INFO: 21epoch:train:901-1000batch: iter_time=9.686e-05, forward_time=0.109, loss_ctc=65.698, loss_att=52.821, acc=0.694, loss=56.684, backward_time=0.754, grad_norm=87.102, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.091e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 02:11:25,991 (trainer:732) INFO: 21epoch:train:1001-1100batch: iter_time=1.035e-04, forward_time=0.108, loss_ctc=70.095, loss_att=51.641, acc=0.713, loss=57.177, backward_time=0.751, grad_norm=85.639, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.089e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 02:13:05,844 (trainer:732) INFO: 21epoch:train:1101-1200batch: iter_time=1.030e-04, forward_time=0.108, loss_ctc=71.874, loss_att=52.739, acc=0.696, loss=58.480, backward_time=0.752, grad_norm=119.352, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.087e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 02:14:45,543 (trainer:732) INFO: 21epoch:train:1201-1300batch: iter_time=9.500e-05, forward_time=0.108, loss_ctc=79.532, loss_att=61.570, acc=0.679, loss=66.959, backward_time=0.750, grad_norm=83.751, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.114, optim0_lr0=8.084e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 02:16:25,354 (trainer:732) INFO: 21epoch:train:1301-1400batch: iter_time=9.425e-05, forward_time=0.108, loss_ctc=70.489, loss_att=51.580, acc=0.691, loss=57.252, backward_time=0.751, grad_norm=89.038, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.082e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 02:18:05,983 (trainer:732) INFO: 21epoch:train:1401-1500batch: iter_time=9.711e-05, forward_time=0.109, loss_ctc=79.242, loss_att=59.276, acc=0.681, loss=65.266, backward_time=0.754, grad_norm=99.915, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.080e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 02:19:45,694 (trainer:732) INFO: 21epoch:train:1501-1600batch: iter_time=1.032e-04, forward_time=0.107, loss_ctc=86.448, loss_att=59.634, acc=0.687, loss=67.678, backward_time=0.752, grad_norm=119.579, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.113, optim0_lr0=8.078e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 02:21:03,476 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-07 02:21:22,929 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9520c35d20>)
+[gpua003:0/64] 2023-07-07 02:21:26,897 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-07 02:21:26,904 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 02:26:35,736 (trainer:732) INFO: 21epoch:train:1601-1700batch: iter_time=3.035, forward_time=0.137, loss_ctc=74.922, loss_att=57.634, acc=0.681, loss=62.820, backward_time=0.764, grad_norm=103.929, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.115, optim0_lr0=8.076e-05, train_time=8.200
+[gpua003:0/64] 2023-07-07 02:28:16,442 (trainer:732) INFO: 21epoch:train:1701-1800batch: iter_time=9.394e-05, forward_time=0.110, loss_ctc=67.692, loss_att=51.540, acc=0.720, loss=56.386, backward_time=0.754, grad_norm=84.127, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.074e-05, train_time=2.015
+[gpua003:0/64] 2023-07-07 02:29:56,377 (trainer:732) INFO: 21epoch:train:1801-1900batch: iter_time=9.295e-05, forward_time=0.109, loss_ctc=67.942, loss_att=54.566, acc=0.708, loss=58.578, backward_time=0.753, grad_norm=95.960, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.072e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 02:31:36,002 (trainer:732) INFO: 21epoch:train:1901-2000batch: iter_time=8.542e-05, forward_time=0.108, loss_ctc=62.560, loss_att=45.274, acc=0.709, loss=50.460, backward_time=0.750, grad_norm=78.689, clip=100.000, loss_scale=9.223e+18, optim_step_time=0.112, optim0_lr0=8.070e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 02:33:15,755 (trainer:732) INFO: 21epoch:train:2001-2100batch: iter_time=8.779e-05, forward_time=0.108, loss_ctc=83.888, loss_att=62.880, acc=0.695, loss=69.182, backward_time=0.752, grad_norm=111.465, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.068e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 02:34:55,601 (trainer:732) INFO: 21epoch:train:2101-2200batch: iter_time=9.295e-05, forward_time=0.108, loss_ctc=71.779, loss_att=53.269, acc=0.708, loss=58.822, backward_time=0.753, grad_norm=78.300, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.066e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 02:36:35,519 (trainer:732) INFO: 21epoch:train:2201-2300batch: iter_time=8.719e-05, forward_time=0.109, loss_ctc=77.907, loss_att=57.920, acc=0.681, loss=63.916, backward_time=0.753, grad_norm=94.141, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.063e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 02:38:15,578 (trainer:732) INFO: 21epoch:train:2301-2400batch: iter_time=1.077e-04, forward_time=0.109, loss_ctc=77.449, loss_att=56.410, acc=0.699, loss=62.722, backward_time=0.751, grad_norm=114.357, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.061e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 02:40:15,411 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-07 02:40:34,895 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a7553460>)
+[gpua003:0/64] 2023-07-07 02:40:38,734 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-07 02:40:38,741 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 02:43:57,355 (trainer:732) INFO: 21epoch:train:2401-2500batch: iter_time=1.755, forward_time=0.116, loss_ctc=73.960, loss_att=51.538, acc=0.693, loss=58.265, backward_time=0.768, grad_norm=101.777, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.059e-05, train_time=6.835
+[gpua003:0/64] 2023-07-07 02:45:38,947 (trainer:732) INFO: 21epoch:train:2501-2600batch: iter_time=1.017e-04, forward_time=0.107, loss_ctc=73.796, loss_att=60.954, acc=0.696, loss=64.807, backward_time=0.760, grad_norm=102.406, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.057e-05, train_time=2.032
+[gpua003:0/64] 2023-07-07 02:47:21,878 (trainer:732) INFO: 21epoch:train:2601-2700batch: iter_time=1.022e-04, forward_time=0.107, loss_ctc=65.924, loss_att=51.785, acc=0.701, loss=56.026, backward_time=0.752, grad_norm=94.190, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.055e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 02:49:01,878 (trainer:732) INFO: 21epoch:train:2701-2800batch: iter_time=1.006e-04, forward_time=0.107, loss_ctc=68.630, loss_att=49.530, acc=0.716, loss=55.260, backward_time=0.752, grad_norm=96.790, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.053e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 02:50:43,603 (trainer:732) INFO: 21epoch:train:2801-2900batch: iter_time=1.010e-04, forward_time=0.109, loss_ctc=78.316, loss_att=57.443, acc=0.680, loss=63.705, backward_time=0.757, grad_norm=110.859, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.051e-05, train_time=2.034
+[gpua003:0/64] 2023-07-07 02:52:23,596 (trainer:732) INFO: 21epoch:train:2901-3000batch: iter_time=9.699e-05, forward_time=0.110, loss_ctc=74.531, loss_att=55.316, acc=0.713, loss=61.081, backward_time=0.754, grad_norm=91.694, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.049e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 02:54:03,277 (trainer:732) INFO: 21epoch:train:3001-3100batch: iter_time=1.094e-04, forward_time=0.108, loss_ctc=72.826, loss_att=52.947, acc=0.686, loss=58.911, backward_time=0.751, grad_norm=96.991, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.047e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 02:55:45,673 (trainer:732) INFO: 21epoch:train:3101-3200batch: iter_time=1.044e-04, forward_time=0.128, loss_ctc=79.741, loss_att=59.681, acc=0.688, loss=65.699, backward_time=0.753, grad_norm=105.684, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.045e-05, train_time=2.048
+[gpua003:0/64] 2023-07-07 02:57:26,045 (trainer:732) INFO: 21epoch:train:3201-3300batch: iter_time=4.703e-04, forward_time=0.110, loss_ctc=72.892, loss_att=50.604, acc=0.704, loss=57.290, backward_time=0.750, grad_norm=98.215, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.043e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 02:57:59,564 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-07 02:58:19,306 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8875fe7100>)
+[gpua003:0/64] 2023-07-07 02:58:23,210 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-07 02:58:23,216 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:04:26,311 (trainer:732) INFO: 21epoch:train:3301-3400batch: iter_time=1.411, forward_time=0.113, loss_ctc=70.531, loss_att=53.598, acc=0.698, loss=58.678, backward_time=0.765, grad_norm=95.913, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.040e-05, train_time=8.405
+[gpua003:0/64] 2023-07-07 03:06:06,849 (trainer:732) INFO: 21epoch:train:3401-3500batch: iter_time=1.204e-04, forward_time=0.109, loss_ctc=66.577, loss_att=55.002, acc=0.685, loss=58.474, backward_time=0.753, grad_norm=97.139, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.038e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 03:07:46,882 (trainer:732) INFO: 21epoch:train:3501-3600batch: iter_time=1.055e-04, forward_time=0.108, loss_ctc=68.566, loss_att=48.282, acc=0.717, loss=54.367, backward_time=0.754, grad_norm=84.774, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.036e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 03:09:26,695 (trainer:732) INFO: 21epoch:train:3601-3700batch: iter_time=1.129e-04, forward_time=0.109, loss_ctc=76.925, loss_att=57.566, acc=0.683, loss=63.374, backward_time=0.753, grad_norm=117.953, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.034e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 03:11:06,310 (trainer:732) INFO: 21epoch:train:3701-3800batch: iter_time=1.180e-04, forward_time=0.107, loss_ctc=70.698, loss_att=53.316, acc=0.704, loss=58.530, backward_time=0.751, grad_norm=94.370, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.032e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 03:12:46,196 (trainer:732) INFO: 21epoch:train:3801-3900batch: iter_time=1.218e-04, forward_time=0.109, loss_ctc=74.676, loss_att=54.150, acc=0.689, loss=60.307, backward_time=0.752, grad_norm=84.395, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.030e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:14:25,952 (trainer:732) INFO: 21epoch:train:3901-4000batch: iter_time=9.952e-05, forward_time=0.107, loss_ctc=79.508, loss_att=60.374, acc=0.682, loss=66.114, backward_time=0.752, grad_norm=109.277, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.028e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 03:16:05,888 (trainer:732) INFO: 21epoch:train:4001-4100batch: iter_time=8.976e-05, forward_time=0.108, loss_ctc=76.068, loss_att=53.950, acc=0.697, loss=60.586, backward_time=0.754, grad_norm=109.443, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.112, optim0_lr0=8.026e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 03:17:13,032 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-07 03:17:32,119 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85e9701cf0>)
+[gpua003:0/64] 2023-07-07 03:17:35,672 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-07 03:17:35,678 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:21:24,869 (trainer:732) INFO: 21epoch:train:4101-4200batch: iter_time=2.108, forward_time=0.163, loss_ctc=73.138, loss_att=59.185, acc=0.677, loss=63.371, backward_time=0.766, grad_norm=100.088, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.115, optim0_lr0=8.024e-05, train_time=6.378
+[gpua003:0/64] 2023-07-07 03:23:05,523 (trainer:732) INFO: 21epoch:train:4201-4300batch: iter_time=9.623e-05, forward_time=0.109, loss_ctc=67.172, loss_att=52.075, acc=0.720, loss=56.604, backward_time=0.755, grad_norm=80.071, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.022e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 03:24:45,805 (trainer:732) INFO: 21epoch:train:4301-4400batch: iter_time=1.166e-04, forward_time=0.111, loss_ctc=66.732, loss_att=52.975, acc=0.711, loss=57.102, backward_time=0.754, grad_norm=109.947, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.020e-05, train_time=2.005
+[gpua003:0/64] 2023-07-07 03:26:25,913 (trainer:732) INFO: 21epoch:train:4401-4500batch: iter_time=1.080e-04, forward_time=0.110, loss_ctc=63.186, loss_att=44.532, acc=0.715, loss=50.128, backward_time=0.755, grad_norm=80.740, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.018e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 03:28:05,766 (trainer:732) INFO: 21epoch:train:4501-4600batch: iter_time=1.226e-04, forward_time=0.110, loss_ctc=82.770, loss_att=62.550, acc=0.696, loss=68.616, backward_time=0.753, grad_norm=110.536, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.016e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:29:48,662 (trainer:732) INFO: 21epoch:train:4601-4700batch: iter_time=1.171e-04, forward_time=0.111, loss_ctc=71.738, loss_att=52.977, acc=0.712, loss=58.605, backward_time=0.757, grad_norm=88.053, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.014e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 03:31:28,763 (trainer:732) INFO: 21epoch:train:4701-4800batch: iter_time=1.118e-04, forward_time=0.110, loss_ctc=75.673, loss_att=54.196, acc=0.695, loss=60.639, backward_time=0.754, grad_norm=90.929, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=8.011e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 03:33:08,783 (trainer:732) INFO: 21epoch:train:4801-4900batch: iter_time=1.194e-04, forward_time=0.110, loss_ctc=76.080, loss_att=56.995, acc=0.697, loss=62.720, backward_time=0.755, grad_norm=119.887, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.009e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 03:34:49,915 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-07 03:35:09,166 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85e97b62c0>)
+[gpua003:0/64] 2023-07-07 03:35:12,753 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 03:35:12,760 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:39:22,173 (trainer:732) INFO: 21epoch:train:4901-5000batch: iter_time=1.327, forward_time=0.110, loss_ctc=72.216, loss_att=51.391, acc=0.693, loss=57.639, backward_time=0.762, grad_norm=97.793, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.007e-05, train_time=7.468
+[gpua003:0/64] 2023-07-07 03:41:06,488 (trainer:732) INFO: 21epoch:train:5001-5100batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=70.415, loss_att=54.270, acc=0.710, loss=59.113, backward_time=0.761, grad_norm=92.209, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.005e-05, train_time=2.086
+[gpua003:0/64] 2023-07-07 03:42:46,655 (trainer:732) INFO: 21epoch:train:5101-5200batch: iter_time=9.585e-05, forward_time=0.109, loss_ctc=64.440, loss_att=51.288, acc=0.697, loss=55.233, backward_time=0.754, grad_norm=91.829, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.003e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 03:44:26,435 (trainer:732) INFO: 21epoch:train:5201-5300batch: iter_time=1.122e-04, forward_time=0.108, loss_ctc=66.016, loss_att=47.983, acc=0.711, loss=53.393, backward_time=0.753, grad_norm=82.415, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=8.001e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 03:46:06,273 (trainer:732) INFO: 21epoch:train:5301-5400batch: iter_time=1.066e-04, forward_time=0.109, loss_ctc=79.568, loss_att=58.274, acc=0.689, loss=64.662, backward_time=0.752, grad_norm=101.250, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.999e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:47:45,977 (trainer:732) INFO: 21epoch:train:5401-5500batch: iter_time=9.211e-05, forward_time=0.108, loss_ctc=77.093, loss_att=57.211, acc=0.708, loss=63.176, backward_time=0.752, grad_norm=88.698, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.997e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 03:49:25,739 (trainer:732) INFO: 21epoch:train:5501-5600batch: iter_time=1.027e-04, forward_time=0.108, loss_ctc=70.776, loss_att=51.574, acc=0.689, loss=57.334, backward_time=0.752, grad_norm=92.327, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.995e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 03:51:05,561 (trainer:732) INFO: 21epoch:train:5601-5700batch: iter_time=9.823e-05, forward_time=0.109, loss_ctc=78.570, loss_att=59.212, acc=0.684, loss=65.020, backward_time=0.752, grad_norm=102.663, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.993e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 03:52:45,407 (trainer:732) INFO: 21epoch:train:5701-5800batch: iter_time=1.062e-04, forward_time=0.108, loss_ctc=74.565, loss_att=51.158, acc=0.691, loss=58.180, backward_time=0.753, grad_norm=101.119, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.114, optim0_lr0=7.991e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 03:53:20,039 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-07 03:53:39,181 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a3d76c80>)
+[gpua003:0/64] 2023-07-07 03:53:42,818 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 03:53:42,824 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 03:58:59,477 (trainer:732) INFO: 21epoch:train:5801-5900batch: iter_time=1.377, forward_time=0.109, loss_ctc=68.222, loss_att=54.113, acc=0.702, loss=58.346, backward_time=0.768, grad_norm=85.484, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.989e-05, train_time=7.481
+[gpua003:0/64] 2023-07-07 04:00:40,031 (trainer:732) INFO: 21epoch:train:5901-6000batch: iter_time=9.701e-05, forward_time=0.108, loss_ctc=67.129, loss_att=54.573, acc=0.697, loss=58.340, backward_time=0.753, grad_norm=88.099, clip=100.000, loss_scale=1.845e+19, optim_step_time=0.113, optim0_lr0=7.987e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 04:02:20,207 (trainer:732) INFO: 21epoch:train:6001-6100batch: iter_time=9.362e-05, forward_time=0.109, loss_ctc=68.337, loss_att=48.416, acc=0.728, loss=54.392, backward_time=0.752, grad_norm=86.732, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.985e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 04:04:01,464 (trainer:732) INFO: 21epoch:train:6101-6200batch: iter_time=9.073e-05, forward_time=0.109, loss_ctc=75.111, loss_att=55.799, acc=0.695, loss=61.593, backward_time=0.761, grad_norm=94.382, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.983e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 04:05:41,365 (trainer:732) INFO: 21epoch:train:6201-6300batch: iter_time=9.518e-05, forward_time=0.109, loss_ctc=69.408, loss_att=51.348, acc=0.719, loss=56.766, backward_time=0.752, grad_norm=91.702, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.981e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 04:07:21,149 (trainer:732) INFO: 21epoch:train:6301-6400batch: iter_time=9.310e-05, forward_time=0.109, loss_ctc=72.354, loss_att=53.860, acc=0.695, loss=59.408, backward_time=0.752, grad_norm=82.091, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.979e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 04:09:01,102 (trainer:732) INFO: 21epoch:train:6401-6500batch: iter_time=9.926e-05, forward_time=0.109, loss_ctc=78.032, loss_att=54.051, acc=0.697, loss=61.245, backward_time=0.753, grad_norm=110.132, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.977e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 04:10:40,790 (trainer:732) INFO: 21epoch:train:6501-6600batch: iter_time=9.807e-05, forward_time=0.107, loss_ctc=78.055, loss_att=56.847, acc=0.705, loss=63.209, backward_time=0.752, grad_norm=115.245, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.975e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 04:11:49,314 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-07 04:12:08,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f843c793520>)
+[gpua003:0/64] 2023-07-07 04:12:11,990 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 04:12:11,997 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 04:15:49,294 (trainer:732) INFO: 21epoch:train:6601-6700batch: iter_time=1.324, forward_time=0.109, loss_ctc=74.801, loss_att=60.444, acc=0.679, loss=64.751, backward_time=0.766, grad_norm=105.116, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.973e-05, train_time=6.170
+[gpua003:0/64] 2023-07-07 04:17:31,076 (trainer:732) INFO: 21epoch:train:6701-6800batch: iter_time=9.790e-05, forward_time=0.111, loss_ctc=67.289, loss_att=50.540, acc=0.720, loss=55.565, backward_time=0.757, grad_norm=81.960, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.971e-05, train_time=2.035
+[gpua003:0/64] 2023-07-07 04:19:10,966 (trainer:732) INFO: 21epoch:train:6801-6900batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=67.753, loss_att=53.586, acc=0.710, loss=57.836, backward_time=0.751, grad_norm=85.772, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.969e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 04:20:51,192 (trainer:732) INFO: 21epoch:train:6901-7000batch: iter_time=1.021e-04, forward_time=0.108, loss_ctc=62.269, loss_att=43.393, acc=0.718, loss=49.056, backward_time=0.752, grad_norm=94.628, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.967e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 04:22:31,278 (trainer:732) INFO: 21epoch:train:7001-7100batch: iter_time=1.125e-04, forward_time=0.109, loss_ctc=82.553, loss_att=62.057, acc=0.701, loss=68.206, backward_time=0.752, grad_norm=96.053, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.965e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 04:24:11,011 (trainer:732) INFO: 21epoch:train:7101-7200batch: iter_time=9.769e-05, forward_time=0.109, loss_ctc=73.617, loss_att=53.857, acc=0.710, loss=59.785, backward_time=0.752, grad_norm=106.016, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.963e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 04:25:51,930 (trainer:732) INFO: 21epoch:train:7201-7300batch: iter_time=2.744e-04, forward_time=0.119, loss_ctc=71.583, loss_att=52.769, acc=0.697, loss=58.413, backward_time=0.752, grad_norm=92.085, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.961e-05, train_time=2.018
+[gpua003:0/64] 2023-07-07 04:27:31,763 (trainer:732) INFO: 21epoch:train:7301-7400batch: iter_time=9.744e-05, forward_time=0.109, loss_ctc=78.262, loss_att=56.400, acc=0.700, loss=62.958, backward_time=0.753, grad_norm=108.333, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.959e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 04:29:11,616 (trainer:732) INFO: 21epoch:train:7401-7500batch: iter_time=9.512e-05, forward_time=0.109, loss_ctc=72.795, loss_att=51.382, acc=0.699, loss=57.806, backward_time=0.752, grad_norm=106.912, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.957e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 04:29:19,879 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-07 04:29:39,014 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 04:29:44,220 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a3dd37f0>)
+[gpua003:0/64] 2023-07-07 04:29:44,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-07 04:29:44,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 04:34:49,745 (trainer:732) INFO: 21epoch:train:7501-7600batch: iter_time=1.925, forward_time=0.158, loss_ctc=69.045, loss_att=53.787, acc=0.717, loss=58.364, backward_time=0.770, grad_norm=91.648, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.955e-05, train_time=6.762
+[gpua003:0/64] 2023-07-07 04:36:30,383 (trainer:732) INFO: 21epoch:train:7601-7700batch: iter_time=9.981e-05, forward_time=0.109, loss_ctc=65.367, loss_att=51.408, acc=0.707, loss=55.596, backward_time=0.753, grad_norm=96.250, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.952e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 04:38:11,636 (trainer:732) INFO: 21epoch:train:7701-7800batch: iter_time=1.030e-04, forward_time=0.110, loss_ctc=65.290, loss_att=46.653, acc=0.718, loss=52.244, backward_time=0.752, grad_norm=97.699, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.950e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 04:39:52,657 (trainer:732) INFO: 21epoch:train:7801-7900batch: iter_time=9.248e-05, forward_time=0.116, loss_ctc=79.587, loss_att=57.577, acc=0.697, loss=64.180, backward_time=0.752, grad_norm=90.219, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.948e-05, train_time=2.020
+[gpua003:0/64] 2023-07-07 04:41:35,649 (trainer:732) INFO: 21epoch:train:7901-8000batch: iter_time=6.759e-04, forward_time=0.130, loss_ctc=75.540, loss_att=56.684, acc=0.718, loss=62.341, backward_time=0.758, grad_norm=87.590, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.946e-05, train_time=2.060
+[gpua003:0/64] 2023-07-07 04:43:22,167 (trainer:732) INFO: 21epoch:train:8001-8100batch: iter_time=1.011e-04, forward_time=0.149, loss_ctc=71.159, loss_att=52.801, acc=0.693, loss=58.309, backward_time=0.770, grad_norm=152.348, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.116, optim0_lr0=7.944e-05, train_time=2.130
+[gpua003:0/64] 2023-07-07 04:45:05,978 (trainer:732) INFO: 21epoch:train:8101-8200batch: iter_time=9.418e-05, forward_time=0.140, loss_ctc=76.922, loss_att=55.656, acc=0.697, loss=62.036, backward_time=0.756, grad_norm=115.315, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.942e-05, train_time=2.076
+[gpua003:0/64] 2023-07-07 04:46:48,173 (trainer:732) INFO: 21epoch:train:8201-8300batch: iter_time=1.139e-04, forward_time=0.112, loss_ctc=71.045, loss_att=50.537, acc=0.700, loss=56.690, backward_time=0.751, grad_norm=97.377, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.940e-05, train_time=2.044
+[gpua003:0/64] 2023-07-07 04:47:40,909 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-07 04:48:00,341 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 04:48:04,000 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8471d437c0>)
+[gpua003:0/64] 2023-07-07 04:48:04,001 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-07 04:48:04,007 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 04:53:34,303 (trainer:732) INFO: 21epoch:train:8301-8400batch: iter_time=2.928, forward_time=0.156, loss_ctc=74.000, loss_att=62.198, acc=0.690, loss=65.739, backward_time=0.775, grad_norm=93.122, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.938e-05, train_time=8.122
+[gpua003:0/64] 2023-07-07 04:55:17,196 (trainer:732) INFO: 21epoch:train:8401-8500batch: iter_time=1.028e-04, forward_time=0.111, loss_ctc=65.302, loss_att=52.812, acc=0.700, loss=56.559, backward_time=0.756, grad_norm=78.595, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.936e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 04:56:57,424 (trainer:732) INFO: 21epoch:train:8501-8600batch: iter_time=9.658e-05, forward_time=0.108, loss_ctc=68.684, loss_att=51.115, acc=0.721, loss=56.385, backward_time=0.750, grad_norm=82.716, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.934e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 04:58:38,276 (trainer:732) INFO: 21epoch:train:8601-8700batch: iter_time=1.075e-04, forward_time=0.108, loss_ctc=69.325, loss_att=51.087, acc=0.707, loss=56.559, backward_time=0.752, grad_norm=99.216, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.932e-05, train_time=2.017
+[gpua003:0/64] 2023-07-07 05:00:18,316 (trainer:732) INFO: 21epoch:train:8701-8800batch: iter_time=9.697e-05, forward_time=0.109, loss_ctc=73.945, loss_att=57.455, acc=0.694, loss=62.402, backward_time=0.753, grad_norm=89.756, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.930e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 05:01:58,070 (trainer:732) INFO: 21epoch:train:8801-8900batch: iter_time=1.013e-04, forward_time=0.109, loss_ctc=70.106, loss_att=52.451, acc=0.702, loss=57.747, backward_time=0.753, grad_norm=85.297, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.114, optim0_lr0=7.928e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 05:03:37,866 (trainer:732) INFO: 21epoch:train:8901-9000batch: iter_time=9.608e-05, forward_time=0.108, loss_ctc=76.088, loss_att=58.045, acc=0.688, loss=63.458, backward_time=0.752, grad_norm=106.493, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.926e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 05:05:17,660 (trainer:732) INFO: 21epoch:train:9001-9100batch: iter_time=9.909e-05, forward_time=0.108, loss_ctc=80.044, loss_att=57.867, acc=0.695, loss=64.520, backward_time=0.752, grad_norm=118.907, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.113, optim0_lr0=7.924e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 05:06:43,861 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-07 05:07:03,332 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 05:07:07,163 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f844ba56680>)
+[gpua003:0/64] 2023-07-07 05:07:07,164 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-07 05:07:07,170 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 05:11:58,302 (trainer:732) INFO: 21epoch:train:9101-9200batch: iter_time=1.887, forward_time=0.165, loss_ctc=73.392, loss_att=56.440, acc=0.687, loss=61.525, backward_time=0.766, grad_norm=116.633, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.115, optim0_lr0=7.922e-05, train_time=8.012
+[gpua003:0/64] 2023-07-07 05:13:38,749 (trainer:732) INFO: 21epoch:train:9201-9300batch: iter_time=9.300e-05, forward_time=0.107, loss_ctc=67.815, loss_att=50.596, acc=0.716, loss=55.762, backward_time=0.753, grad_norm=79.997, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.920e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 05:15:19,209 (trainer:732) INFO: 21epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.107, loss_ctc=66.582, loss_att=52.925, acc=0.708, loss=57.022, backward_time=0.752, grad_norm=93.157, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.919e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 05:16:59,576 (trainer:732) INFO: 21epoch:train:9401-9500batch: iter_time=9.374e-05, forward_time=0.107, loss_ctc=61.813, loss_att=45.211, acc=0.710, loss=50.192, backward_time=0.752, grad_norm=86.096, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.917e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 05:18:40,211 (trainer:732) INFO: 21epoch:train:9501-9600batch: iter_time=9.173e-05, forward_time=0.107, loss_ctc=81.797, loss_att=61.241, acc=0.697, loss=67.408, backward_time=0.752, grad_norm=95.878, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.111, optim0_lr0=7.915e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 05:20:20,733 (trainer:732) INFO: 21epoch:train:9601-9700batch: iter_time=9.476e-05, forward_time=0.107, loss_ctc=72.431, loss_att=53.032, acc=0.704, loss=58.852, backward_time=0.752, grad_norm=86.451, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.913e-05, train_time=2.010
+[gpua003:0/64] 2023-07-07 05:22:00,233 (trainer:732) INFO: 21epoch:train:9701-9800batch: iter_time=9.436e-05, forward_time=0.107, loss_ctc=72.639, loss_att=53.426, acc=0.691, loss=59.190, backward_time=0.751, grad_norm=95.931, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.911e-05, train_time=1.990
+[gpua003:0/64] 2023-07-07 05:23:39,817 (trainer:732) INFO: 21epoch:train:9801-9900batch: iter_time=8.872e-05, forward_time=0.108, loss_ctc=76.251, loss_att=59.146, acc=0.691, loss=64.277, backward_time=0.752, grad_norm=114.109, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.909e-05, train_time=1.991
+[gpua003:0/64] 2023-07-07 05:25:19,445 (trainer:732) INFO: 21epoch:train:9901-10000batch: iter_time=9.037e-05, forward_time=0.107, loss_ctc=73.224, loss_att=50.857, acc=0.694, loss=57.567, backward_time=0.752, grad_norm=105.676, clip=100.000, loss_scale=3.689e+19, optim_step_time=0.112, optim0_lr0=7.907e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 05:38:40,911 (trainer:338) INFO: 21epoch results: [train] iter_time=0.235, forward_time=0.113, loss_ctc=72.893, loss_att=54.497, acc=0.699, loss=60.016, backward_time=0.755, grad_norm=97.715, clip=100.000, loss_scale=2.398e+19, optim_step_time=0.113, optim0_lr0=8.007e-05, train_time=2.656, time=3 hours, 41 minutes and 44.21 seconds, total_count=180000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=50.186, cer_ctc=0.290, loss_att=41.004, acc=0.654, cer=0.409, wer=0.994, loss=43.759, time=6 minutes and 51.3 seconds, total_count=18722, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 7.7 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-07 05:38:59,977 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-07 05:39:00,026 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/14epoch.pth
+[gpua003:0/64] 2023-07-07 05:39:00,083 (trainer:272) INFO: 22/100epoch started. Estimated time to finish: 1 week, 5 days and 17 hours
+[gpua003:0/64] 2023-07-07 05:39:01,612 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-07 05:39:20,669 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89b6dd35e0>)
+[gpua003:0/64] 2023-07-07 05:39:24,275 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 05:39:24,379 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 05:46:09,882 (trainer:732) INFO: 22epoch:train:1-100batch: iter_time=3.232, forward_time=0.130, loss_ctc=75.959, loss_att=57.589, acc=0.700, loss=63.100, backward_time=0.770, grad_norm=94.575, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.905e-05, train_time=8.584
+[gpua003:0/64] 2023-07-07 05:47:51,050 (trainer:732) INFO: 22epoch:train:101-200batch: iter_time=9.855e-05, forward_time=0.109, loss_ctc=68.208, loss_att=52.446, acc=0.682, loss=57.174, backward_time=0.756, grad_norm=97.364, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.903e-05, train_time=2.023
+[gpua003:0/64] 2023-07-07 05:49:30,923 (trainer:732) INFO: 22epoch:train:201-300batch: iter_time=9.754e-05, forward_time=0.110, loss_ctc=76.840, loss_att=59.608, acc=0.719, loss=64.777, backward_time=0.753, grad_norm=95.370, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.901e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 05:51:10,776 (trainer:732) INFO: 22epoch:train:301-400batch: iter_time=9.949e-05, forward_time=0.110, loss_ctc=77.024, loss_att=65.093, acc=0.681, loss=68.673, backward_time=0.752, grad_norm=106.966, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.899e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 05:52:55,638 (trainer:732) INFO: 22epoch:train:401-500batch: iter_time=9.878e-05, forward_time=0.109, loss_ctc=70.205, loss_att=56.739, acc=0.696, loss=60.779, backward_time=0.761, grad_norm=90.649, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.897e-05, train_time=2.097
+[gpua003:0/64] 2023-07-07 05:54:39,214 (trainer:732) INFO: 22epoch:train:501-600batch: iter_time=1.011e-04, forward_time=0.109, loss_ctc=67.033, loss_att=51.629, acc=0.698, loss=56.250, backward_time=0.754, grad_norm=93.014, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.895e-05, train_time=2.071
+[gpua003:0/64] 2023-07-07 05:56:20,364 (trainer:732) INFO: 22epoch:train:601-700batch: iter_time=1.016e-04, forward_time=0.109, loss_ctc=65.903, loss_att=46.926, acc=0.685, loss=52.619, backward_time=0.751, grad_norm=81.828, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.893e-05, train_time=2.023
+[gpua003:0/64] 2023-07-07 05:58:00,763 (trainer:732) INFO: 22epoch:train:701-800batch: iter_time=9.919e-05, forward_time=0.108, loss_ctc=77.598, loss_att=57.244, acc=0.698, loss=63.350, backward_time=0.751, grad_norm=97.156, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.891e-05, train_time=2.008
+[gpua003:0/64] 2023-07-07 05:58:40,682 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-07 05:58:59,667 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f89b6dd3dc0>)
+[gpua003:0/64] 2023-07-07 05:59:03,212 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-07 05:59:03,218 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:03:49,036 (trainer:732) INFO: 22epoch:train:801-900batch: iter_time=1.356, forward_time=0.139, loss_ctc=73.151, loss_att=56.666, acc=0.700, loss=61.612, backward_time=0.769, grad_norm=98.640, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.889e-05, train_time=6.965
+[gpua003:0/64] 2023-07-07 06:05:29,630 (trainer:732) INFO: 22epoch:train:901-1000batch: iter_time=9.770e-05, forward_time=0.110, loss_ctc=68.300, loss_att=50.098, acc=0.687, loss=55.559, backward_time=0.754, grad_norm=100.634, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.887e-05, train_time=2.012
+[gpua003:0/64] 2023-07-07 06:07:09,505 (trainer:732) INFO: 22epoch:train:1001-1100batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=73.251, loss_att=59.729, acc=0.702, loss=63.786, backward_time=0.752, grad_norm=99.591, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.885e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:08:52,104 (trainer:732) INFO: 22epoch:train:1101-1200batch: iter_time=1.038e-04, forward_time=0.109, loss_ctc=71.657, loss_att=55.743, acc=0.704, loss=60.517, backward_time=0.765, grad_norm=85.238, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.883e-05, train_time=2.052
+[gpua003:0/64] 2023-07-07 06:10:32,190 (trainer:732) INFO: 22epoch:train:1201-1300batch: iter_time=1.035e-04, forward_time=0.109, loss_ctc=76.079, loss_att=65.762, acc=0.682, loss=68.857, backward_time=0.753, grad_norm=102.836, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.881e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 06:12:12,065 (trainer:732) INFO: 22epoch:train:1301-1400batch: iter_time=9.626e-05, forward_time=0.109, loss_ctc=69.609, loss_att=55.901, acc=0.688, loss=60.014, backward_time=0.753, grad_norm=115.398, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.879e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:13:52,048 (trainer:732) INFO: 22epoch:train:1401-1500batch: iter_time=8.880e-05, forward_time=0.109, loss_ctc=61.548, loss_att=44.239, acc=0.684, loss=49.431, backward_time=0.753, grad_norm=108.769, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.877e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 06:15:31,817 (trainer:732) INFO: 22epoch:train:1501-1600batch: iter_time=9.085e-05, forward_time=0.109, loss_ctc=73.610, loss_att=51.494, acc=0.698, loss=58.129, backward_time=0.753, grad_norm=95.261, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.875e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 06:16:54,392 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-07 06:17:13,867 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8898db79a0>)
+[gpua003:0/64] 2023-07-07 06:17:17,499 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-07 06:17:17,506 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:21:41,543 (trainer:732) INFO: 22epoch:train:1601-1700batch: iter_time=2.544, forward_time=0.118, loss_ctc=71.770, loss_att=56.335, acc=0.704, loss=60.965, backward_time=0.763, grad_norm=88.321, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.873e-05, train_time=7.394
+[gpua003:0/64] 2023-07-07 06:23:24,648 (trainer:732) INFO: 22epoch:train:1701-1800batch: iter_time=8.832e-05, forward_time=0.108, loss_ctc=72.678, loss_att=52.151, acc=0.698, loss=58.309, backward_time=0.758, grad_norm=95.685, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.871e-05, train_time=2.062
+[gpua003:0/64] 2023-07-07 06:25:06,463 (trainer:732) INFO: 22epoch:train:1801-1900batch: iter_time=9.287e-05, forward_time=0.122, loss_ctc=66.852, loss_att=52.819, acc=0.716, loss=57.029, backward_time=0.755, grad_norm=93.289, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.869e-05, train_time=2.036
+[gpua003:0/64] 2023-07-07 06:26:47,146 (trainer:732) INFO: 22epoch:train:1901-2000batch: iter_time=1.001e-04, forward_time=0.114, loss_ctc=77.777, loss_att=64.279, acc=0.708, loss=68.329, backward_time=0.754, grad_norm=107.242, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.115, optim0_lr0=7.867e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 06:28:27,163 (trainer:732) INFO: 22epoch:train:2001-2100batch: iter_time=9.088e-05, forward_time=0.109, loss_ctc=75.746, loss_att=62.285, acc=0.691, loss=66.323, backward_time=0.754, grad_norm=93.071, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.865e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 06:30:09,221 (trainer:732) INFO: 22epoch:train:2101-2200batch: iter_time=8.882e-05, forward_time=0.123, loss_ctc=70.706, loss_att=58.251, acc=0.693, loss=61.987, backward_time=0.758, grad_norm=99.862, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.863e-05, train_time=2.041
+[gpua003:0/64] 2023-07-07 06:31:49,013 (trainer:732) INFO: 22epoch:train:2201-2300batch: iter_time=8.759e-05, forward_time=0.108, loss_ctc=56.761, loss_att=45.017, acc=0.696, loss=48.540, backward_time=0.752, grad_norm=80.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.862e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 06:33:28,852 (trainer:732) INFO: 22epoch:train:2301-2400batch: iter_time=9.062e-05, forward_time=0.108, loss_ctc=69.849, loss_att=49.995, acc=0.703, loss=55.951, backward_time=0.751, grad_norm=112.615, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.860e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:35:09,707 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-07 06:35:28,850 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f88a3de2800>)
+[gpua003:0/64] 2023-07-07 06:35:32,446 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpua003:0/64] 2023-07-07 06:35:32,453 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:39:06,065 (trainer:732) INFO: 22epoch:train:2401-2500batch: iter_time=1.292, forward_time=0.109, loss_ctc=71.798, loss_att=50.317, acc=0.717, loss=56.761, backward_time=0.759, grad_norm=95.019, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.858e-05, train_time=6.744
+[gpua003:0/64] 2023-07-07 06:40:51,328 (trainer:732) INFO: 22epoch:train:2501-2600batch: iter_time=1.031e-04, forward_time=0.109, loss_ctc=75.859, loss_att=56.028, acc=0.710, loss=61.978, backward_time=0.760, grad_norm=95.952, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.856e-05, train_time=2.105
+[gpua003:0/64] 2023-07-07 06:42:31,354 (trainer:732) INFO: 22epoch:train:2601-2700batch: iter_time=1.082e-04, forward_time=0.108, loss_ctc=66.166, loss_att=50.393, acc=0.695, loss=55.125, backward_time=0.751, grad_norm=86.983, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.854e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 06:44:10,962 (trainer:732) INFO: 22epoch:train:2701-2800batch: iter_time=1.081e-04, forward_time=0.108, loss_ctc=77.042, loss_att=58.748, acc=0.721, loss=64.236, backward_time=0.750, grad_norm=99.652, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.852e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 06:45:50,837 (trainer:732) INFO: 22epoch:train:2801-2900batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=73.184, loss_att=61.853, acc=0.694, loss=65.253, backward_time=0.752, grad_norm=101.544, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.850e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 06:47:30,553 (trainer:732) INFO: 22epoch:train:2901-3000batch: iter_time=1.154e-04, forward_time=0.109, loss_ctc=70.952, loss_att=57.515, acc=0.703, loss=61.546, backward_time=0.751, grad_norm=97.436, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.848e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 06:49:10,499 (trainer:732) INFO: 22epoch:train:3001-3100batch: iter_time=1.189e-04, forward_time=0.109, loss_ctc=62.039, loss_att=47.304, acc=0.712, loss=51.724, backward_time=0.753, grad_norm=90.810, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.846e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 06:50:50,138 (trainer:732) INFO: 22epoch:train:3101-3200batch: iter_time=1.267e-04, forward_time=0.109, loss_ctc=66.663, loss_att=46.048, acc=0.693, loss=52.232, backward_time=0.750, grad_norm=86.756, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.844e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 06:52:32,525 (trainer:732) INFO: 22epoch:train:3201-3300batch: iter_time=1.227e-04, forward_time=0.110, loss_ctc=72.678, loss_att=54.618, acc=0.711, loss=60.036, backward_time=0.757, grad_norm=104.682, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.842e-05, train_time=2.048
+[gpua003:0/64] 2023-07-07 06:53:06,741 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-07 06:53:26,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f845d473ee0>)
+[gpua003:0/64] 2023-07-07 06:53:29,609 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpua003:0/64] 2023-07-07 06:53:30,022 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 06:57:58,983 (trainer:732) INFO: 22epoch:train:3301-3400batch: iter_time=1.348, forward_time=0.145, loss_ctc=72.094, loss_att=56.366, acc=0.703, loss=61.085, backward_time=0.769, grad_norm=89.951, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.114, optim0_lr0=7.840e-05, train_time=6.529
+[gpua003:0/64] 2023-07-07 06:59:39,298 (trainer:732) INFO: 22epoch:train:3401-3500batch: iter_time=1.038e-04, forward_time=0.110, loss_ctc=66.447, loss_att=49.854, acc=0.686, loss=54.832, backward_time=0.753, grad_norm=93.159, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.838e-05, train_time=2.006
+[gpua003:0/64] 2023-07-07 07:01:19,264 (trainer:732) INFO: 22epoch:train:3501-3600batch: iter_time=9.217e-05, forward_time=0.109, loss_ctc=75.507, loss_att=61.068, acc=0.704, loss=65.400, backward_time=0.751, grad_norm=96.972, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.836e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 07:02:59,198 (trainer:732) INFO: 22epoch:train:3601-3700batch: iter_time=1.088e-04, forward_time=0.109, loss_ctc=70.999, loss_att=57.684, acc=0.705, loss=61.678, backward_time=0.752, grad_norm=85.253, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.834e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 07:04:38,930 (trainer:732) INFO: 22epoch:train:3701-3800batch: iter_time=1.136e-04, forward_time=0.109, loss_ctc=72.958, loss_att=61.007, acc=0.688, loss=64.592, backward_time=0.750, grad_norm=110.731, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.833e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 07:06:18,662 (trainer:732) INFO: 22epoch:train:3801-3900batch: iter_time=1.150e-04, forward_time=0.109, loss_ctc=69.973, loss_att=57.379, acc=0.688, loss=61.157, backward_time=0.751, grad_norm=104.773, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.113, optim0_lr0=7.831e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 07:07:58,470 (trainer:732) INFO: 22epoch:train:3901-4000batch: iter_time=1.153e-04, forward_time=0.109, loss_ctc=58.578, loss_att=42.394, acc=0.697, loss=47.249, backward_time=0.752, grad_norm=78.336, clip=100.000, loss_scale=7.379e+19, optim_step_time=0.112, optim0_lr0=7.829e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 07:09:38,304 (trainer:732) INFO: 22epoch:train:4001-4100batch: iter_time=9.816e-05, forward_time=0.108, loss_ctc=72.183, loss_att=51.473, acc=0.699, loss=57.686, backward_time=0.753, grad_norm=90.776, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.827e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 07:10:44,632 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-07 07:11:04,150 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8420e69cc0>)
+[gpua003:0/64] 2023-07-07 07:11:07,729 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpua003:0/64] 2023-07-07 07:11:07,735 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 07:14:13,080 (trainer:732) INFO: 22epoch:train:4101-4200batch: iter_time=1.318, forward_time=0.108, loss_ctc=71.941, loss_att=54.810, acc=0.713, loss=59.949, backward_time=0.764, grad_norm=86.300, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.825e-05, train_time=5.495
+[gpua003:0/64] 2023-07-07 07:15:54,008 (trainer:732) INFO: 22epoch:train:4201-4300batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=70.358, loss_att=50.448, acc=0.689, loss=56.421, backward_time=0.755, grad_norm=99.875, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.823e-05, train_time=2.018
+[gpua003:0/64] 2023-07-07 07:17:35,127 (trainer:732) INFO: 22epoch:train:4301-4400batch: iter_time=1.074e-04, forward_time=0.108, loss_ctc=70.078, loss_att=57.577, acc=0.697, loss=61.327, backward_time=0.751, grad_norm=93.447, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.821e-05, train_time=2.022
+[gpua003:0/64] 2023-07-07 07:19:17,039 (trainer:732) INFO: 22epoch:train:4401-4500batch: iter_time=8.866e-05, forward_time=0.108, loss_ctc=73.720, loss_att=60.764, acc=0.703, loss=64.650, backward_time=0.759, grad_norm=98.011, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.819e-05, train_time=2.038
+[gpua003:0/64] 2023-07-07 07:20:57,799 (trainer:732) INFO: 22epoch:train:4501-4600batch: iter_time=9.315e-05, forward_time=0.110, loss_ctc=74.002, loss_att=60.934, acc=0.685, loss=64.855, backward_time=0.754, grad_norm=99.614, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.817e-05, train_time=2.015
+[gpua003:0/64] 2023-07-07 07:22:39,059 (trainer:732) INFO: 22epoch:train:4601-4700batch: iter_time=1.059e-04, forward_time=0.109, loss_ctc=69.200, loss_att=56.517, acc=0.691, loss=60.322, backward_time=0.752, grad_norm=101.222, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.815e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 07:24:20,772 (trainer:732) INFO: 22epoch:train:4701-4800batch: iter_time=1.095e-04, forward_time=0.109, loss_ctc=61.538, loss_att=45.833, acc=0.689, loss=50.545, backward_time=0.754, grad_norm=95.781, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.813e-05, train_time=2.034
+[gpua003:0/64] 2023-07-07 07:26:02,965 (trainer:732) INFO: 22epoch:train:4801-4900batch: iter_time=1.033e-04, forward_time=0.109, loss_ctc=69.082, loss_att=51.514, acc=0.696, loss=56.785, backward_time=0.752, grad_norm=95.683, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.811e-05, train_time=2.044
+[gpua003:0/64] 2023-07-07 07:27:49,767 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-07 07:28:09,330 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f848c6b0550>)
+[gpua003:0/64] 2023-07-07 07:28:12,926 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpua003:0/64] 2023-07-07 07:28:12,933 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 07:31:57,955 (trainer:732) INFO: 22epoch:train:4901-5000batch: iter_time=2.503, forward_time=0.127, loss_ctc=69.914, loss_att=52.926, acc=0.705, loss=58.022, backward_time=0.762, grad_norm=90.567, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.810e-05, train_time=7.100
+[gpua003:0/64] 2023-07-07 07:33:40,006 (trainer:732) INFO: 22epoch:train:5001-5100batch: iter_time=1.018e-04, forward_time=0.109, loss_ctc=75.894, loss_att=56.058, acc=0.706, loss=62.009, backward_time=0.760, grad_norm=95.605, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.808e-05, train_time=2.041
+[gpua003:0/64] 2023-07-07 07:35:20,731 (trainer:732) INFO: 22epoch:train:5101-5200batch: iter_time=1.041e-04, forward_time=0.108, loss_ctc=65.260, loss_att=50.518, acc=0.693, loss=54.941, backward_time=0.754, grad_norm=95.597, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.806e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 07:37:03,337 (trainer:732) INFO: 22epoch:train:5201-5300batch: iter_time=9.542e-05, forward_time=0.109, loss_ctc=75.495, loss_att=58.850, acc=0.712, loss=63.844, backward_time=0.755, grad_norm=96.146, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.804e-05, train_time=2.052
+[gpua003:0/64] 2023-07-07 07:38:52,102 (trainer:732) INFO: 22epoch:train:5301-5400batch: iter_time=9.699e-05, forward_time=0.109, loss_ctc=72.248, loss_att=62.576, acc=0.684, loss=65.477, backward_time=0.759, grad_norm=109.912, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.802e-05, train_time=2.175
+[gpua003:0/64] 2023-07-07 07:40:47,676 (trainer:732) INFO: 22epoch:train:5401-5500batch: iter_time=9.981e-05, forward_time=0.108, loss_ctc=69.974, loss_att=56.541, acc=0.690, loss=60.571, backward_time=0.802, grad_norm=96.604, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.800e-05, train_time=2.311
+[gpua003:0/64] 2023-07-07 07:42:29,427 (trainer:732) INFO: 22epoch:train:5501-5600batch: iter_time=1.005e-04, forward_time=0.108, loss_ctc=62.758, loss_att=47.686, acc=0.702, loss=52.208, backward_time=0.762, grad_norm=88.346, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.798e-05, train_time=2.035
+[gpua003:0/64] 2023-07-07 07:44:09,193 (trainer:732) INFO: 22epoch:train:5601-5700batch: iter_time=1.001e-04, forward_time=0.109, loss_ctc=65.100, loss_att=45.891, acc=0.696, loss=51.654, backward_time=0.751, grad_norm=81.829, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.796e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 07:45:53,335 (trainer:732) INFO: 22epoch:train:5701-5800batch: iter_time=9.999e-05, forward_time=0.108, loss_ctc=74.308, loss_att=57.186, acc=0.695, loss=62.323, backward_time=0.767, grad_norm=150.463, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.794e-05, train_time=2.083
+[gpua003:0/64] 2023-07-07 07:46:33,767 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpua003:0/64] 2023-07-07 07:46:52,851 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8934109cf0>)
+[gpua003:0/64] 2023-07-07 07:46:56,470 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-07 07:46:56,477 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 07:50:29,089 (trainer:732) INFO: 22epoch:train:5801-5900batch: iter_time=1.584, forward_time=0.131, loss_ctc=74.207, loss_att=54.322, acc=0.717, loss=60.288, backward_time=0.764, grad_norm=91.726, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.792e-05, train_time=5.515
+[gpua003:0/64] 2023-07-07 07:52:09,734 (trainer:732) INFO: 22epoch:train:5901-6000batch: iter_time=9.734e-05, forward_time=0.110, loss_ctc=65.678, loss_att=47.794, acc=0.691, loss=53.160, backward_time=0.752, grad_norm=83.021, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.791e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 07:53:50,393 (trainer:732) INFO: 22epoch:train:6001-6100batch: iter_time=9.193e-05, forward_time=0.112, loss_ctc=74.747, loss_att=60.495, acc=0.721, loss=64.771, backward_time=0.755, grad_norm=90.014, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.789e-05, train_time=2.013
+[gpua003:0/64] 2023-07-07 07:55:31,643 (trainer:732) INFO: 22epoch:train:6101-6200batch: iter_time=9.239e-05, forward_time=0.119, loss_ctc=73.282, loss_att=60.349, acc=0.698, loss=64.229, backward_time=0.756, grad_norm=88.930, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.787e-05, train_time=2.025
+[gpua003:0/64] 2023-07-07 07:57:11,271 (trainer:732) INFO: 22epoch:train:6201-6300batch: iter_time=9.293e-05, forward_time=0.108, loss_ctc=67.412, loss_att=54.664, acc=0.707, loss=58.488, backward_time=0.751, grad_norm=95.356, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.785e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 07:58:51,149 (trainer:732) INFO: 22epoch:train:6301-6400batch: iter_time=9.666e-05, forward_time=0.109, loss_ctc=66.473, loss_att=51.695, acc=0.714, loss=56.129, backward_time=0.751, grad_norm=103.423, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.783e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 08:00:31,053 (trainer:732) INFO: 22epoch:train:6401-6500batch: iter_time=9.940e-05, forward_time=0.110, loss_ctc=61.632, loss_att=44.108, acc=0.695, loss=49.365, backward_time=0.752, grad_norm=86.270, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.781e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 08:02:10,665 (trainer:732) INFO: 22epoch:train:6501-6600batch: iter_time=1.027e-04, forward_time=0.109, loss_ctc=72.139, loss_att=51.933, acc=0.712, loss=57.995, backward_time=0.750, grad_norm=96.771, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.779e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 08:03:18,465 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpua003:0/64] 2023-07-07 08:03:37,874 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f84a27b68f0>)
+[gpua003:0/64] 2023-07-07 08:03:41,509 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpua003:0/64] 2023-07-07 08:03:41,515 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 08:07:00,984 (trainer:732) INFO: 22epoch:train:6601-6700batch: iter_time=1.390, forward_time=0.109, loss_ctc=70.332, loss_att=55.203, acc=0.706, loss=59.742, backward_time=0.765, grad_norm=97.451, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.777e-05, train_time=5.806
+[gpua003:0/64] 2023-07-07 08:08:45,065 (trainer:732) INFO: 22epoch:train:6701-6800batch: iter_time=9.209e-05, forward_time=0.108, loss_ctc=72.039, loss_att=50.836, acc=0.705, loss=57.197, backward_time=0.762, grad_norm=96.664, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.776e-05, train_time=2.081
+[gpua003:0/64] 2023-07-07 08:10:27,886 (trainer:732) INFO: 22epoch:train:6801-6900batch: iter_time=9.907e-05, forward_time=0.109, loss_ctc=66.153, loss_att=54.046, acc=0.709, loss=57.678, backward_time=0.754, grad_norm=91.951, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.774e-05, train_time=2.056
+[gpua003:0/64] 2023-07-07 08:12:07,728 (trainer:732) INFO: 22epoch:train:6901-7000batch: iter_time=1.036e-04, forward_time=0.109, loss_ctc=76.287, loss_att=59.045, acc=0.713, loss=64.218, backward_time=0.752, grad_norm=110.971, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.772e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 08:13:52,415 (trainer:732) INFO: 22epoch:train:7001-7100batch: iter_time=9.646e-05, forward_time=0.110, loss_ctc=72.777, loss_att=62.041, acc=0.682, loss=65.261, backward_time=0.770, grad_norm=98.939, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.770e-05, train_time=2.094
+[gpua003:0/64] 2023-07-07 08:15:37,878 (trainer:732) INFO: 22epoch:train:7101-7200batch: iter_time=9.875e-05, forward_time=0.109, loss_ctc=69.184, loss_att=56.850, acc=0.686, loss=60.550, backward_time=0.767, grad_norm=104.102, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.768e-05, train_time=2.109
+[gpua003:0/64] 2023-07-07 08:17:18,027 (trainer:732) INFO: 22epoch:train:7201-7300batch: iter_time=9.466e-05, forward_time=0.108, loss_ctc=56.723, loss_att=44.561, acc=0.694, loss=48.210, backward_time=0.751, grad_norm=82.134, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.766e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 08:19:11,959 (trainer:732) INFO: 22epoch:train:7301-7400batch: iter_time=9.554e-05, forward_time=0.109, loss_ctc=68.916, loss_att=48.970, acc=0.708, loss=54.954, backward_time=0.805, grad_norm=145.408, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.764e-05, train_time=2.278
+[gpua003:0/64] 2023-07-07 08:20:53,519 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpua003:0/64] 2023-07-07 08:21:12,743 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944af90460>)
+[gpua003:0/64] 2023-07-07 08:21:16,313 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 08:21:16,320 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 08:23:45,003 (trainer:732) INFO: 22epoch:train:7401-7500batch: iter_time=1.329, forward_time=0.136, loss_ctc=70.359, loss_att=50.850, acc=0.717, loss=56.703, backward_time=0.762, grad_norm=84.034, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.116, optim0_lr0=7.762e-05, train_time=5.461
+[gpua003:0/64] 2023-07-07 08:25:27,825 (trainer:732) INFO: 22epoch:train:7501-7600batch: iter_time=8.708e-05, forward_time=0.110, loss_ctc=76.765, loss_att=54.087, acc=0.711, loss=60.891, backward_time=0.763, grad_norm=98.850, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.761e-05, train_time=2.056
+[gpua003:0/64] 2023-07-07 08:27:08,334 (trainer:732) INFO: 22epoch:train:7601-7700batch: iter_time=1.042e-04, forward_time=0.109, loss_ctc=64.296, loss_att=50.883, acc=0.706, loss=54.907, backward_time=0.754, grad_norm=87.185, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.112, optim0_lr0=7.759e-05, train_time=2.010
+[gpua003:0/64] 2023-07-07 08:28:48,255 (trainer:732) INFO: 22epoch:train:7701-7800batch: iter_time=8.846e-05, forward_time=0.108, loss_ctc=74.565, loss_att=60.399, acc=0.719, loss=64.649, backward_time=0.752, grad_norm=95.857, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.757e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 08:30:28,045 (trainer:732) INFO: 22epoch:train:7801-7900batch: iter_time=1.037e-04, forward_time=0.109, loss_ctc=73.972, loss_att=62.893, acc=0.689, loss=66.217, backward_time=0.752, grad_norm=87.398, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.755e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 08:32:07,809 (trainer:732) INFO: 22epoch:train:7901-8000batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=70.642, loss_att=57.664, acc=0.693, loss=61.557, backward_time=0.752, grad_norm=92.959, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.753e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 08:33:47,492 (trainer:732) INFO: 22epoch:train:8001-8100batch: iter_time=1.063e-04, forward_time=0.109, loss_ctc=55.497, loss_att=44.132, acc=0.708, loss=47.541, backward_time=0.752, grad_norm=80.785, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.751e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 08:35:27,353 (trainer:732) INFO: 22epoch:train:8101-8200batch: iter_time=1.061e-04, forward_time=0.111, loss_ctc=67.689, loss_att=46.225, acc=0.704, loss=52.664, backward_time=0.752, grad_norm=103.286, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.749e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 08:37:07,180 (trainer:732) INFO: 22epoch:train:8201-8300batch: iter_time=1.080e-04, forward_time=0.111, loss_ctc=72.512, loss_att=53.632, acc=0.717, loss=59.296, backward_time=0.752, grad_norm=82.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.747e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 08:37:49,047 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpua003:0/64] 2023-07-07 08:38:08,430 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f944af91030>)
+[gpua003:0/64] 2023-07-07 08:38:12,343 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 08:38:12,349 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 08:43:42,647 (trainer:732) INFO: 22epoch:train:8301-8400batch: iter_time=2.865, forward_time=0.127, loss_ctc=76.998, loss_att=56.248, acc=0.715, loss=62.473, backward_time=0.764, grad_norm=91.921, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.746e-05, train_time=7.909
+[gpua003:0/64] 2023-07-07 08:45:23,857 (trainer:732) INFO: 22epoch:train:8401-8500batch: iter_time=9.852e-05, forward_time=0.109, loss_ctc=64.243, loss_att=47.232, acc=0.693, loss=52.335, backward_time=0.754, grad_norm=77.637, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.744e-05, train_time=2.024
+[gpua003:0/64] 2023-07-07 08:47:04,339 (trainer:732) INFO: 22epoch:train:8501-8600batch: iter_time=1.040e-04, forward_time=0.109, loss_ctc=74.869, loss_att=61.475, acc=0.714, loss=65.493, backward_time=0.753, grad_norm=93.438, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.742e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 08:48:43,954 (trainer:732) INFO: 22epoch:train:8601-8700batch: iter_time=1.034e-04, forward_time=0.108, loss_ctc=73.122, loss_att=61.953, acc=0.688, loss=65.304, backward_time=0.751, grad_norm=96.667, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.740e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 08:50:26,881 (trainer:732) INFO: 22epoch:train:8701-8800batch: iter_time=1.126e-04, forward_time=0.108, loss_ctc=66.145, loss_att=55.031, acc=0.694, loss=58.365, backward_time=0.754, grad_norm=94.273, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.738e-05, train_time=2.058
+[gpua003:0/64] 2023-07-07 08:52:06,631 (trainer:732) INFO: 22epoch:train:8801-8900batch: iter_time=1.047e-04, forward_time=0.108, loss_ctc=66.295, loss_att=51.963, acc=0.699, loss=56.262, backward_time=0.751, grad_norm=94.275, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.736e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 08:53:46,418 (trainer:732) INFO: 22epoch:train:8901-9000batch: iter_time=9.829e-05, forward_time=0.108, loss_ctc=61.909, loss_att=43.782, acc=0.697, loss=49.220, backward_time=0.752, grad_norm=82.673, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.734e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 08:55:26,132 (trainer:732) INFO: 22epoch:train:9001-9100batch: iter_time=1.043e-04, forward_time=0.108, loss_ctc=71.915, loss_att=53.753, acc=0.707, loss=59.202, backward_time=0.751, grad_norm=109.132, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.733e-05, train_time=1.994
+[gpua003:0/64] 2023-07-07 08:56:34,455 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpua003:0/64] 2023-07-07 08:56:53,892 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8593aca140>)
+[gpua003:0/64] 2023-07-07 08:56:57,546 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-07 08:56:57,553 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 09:00:31,061 (trainer:732) INFO: 22epoch:train:9101-9200batch: iter_time=1.431, forward_time=0.127, loss_ctc=71.039, loss_att=54.915, acc=0.705, loss=59.752, backward_time=0.761, grad_norm=106.663, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.731e-05, train_time=6.098
+[gpua003:0/64] 2023-07-07 09:02:14,178 (trainer:732) INFO: 22epoch:train:9201-9300batch: iter_time=1.042e-04, forward_time=0.122, loss_ctc=72.597, loss_att=52.275, acc=0.703, loss=58.372, backward_time=0.760, grad_norm=105.215, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.729e-05, train_time=2.062
+[gpua003:0/64] 2023-07-07 09:03:55,492 (trainer:732) INFO: 22epoch:train:9301-9400batch: iter_time=1.019e-04, forward_time=0.111, loss_ctc=66.483, loss_att=52.418, acc=0.717, loss=56.638, backward_time=0.755, grad_norm=102.064, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.727e-05, train_time=2.026
+[gpua003:0/64] 2023-07-07 09:05:35,862 (trainer:732) INFO: 22epoch:train:9401-9500batch: iter_time=1.021e-04, forward_time=0.110, loss_ctc=75.311, loss_att=58.987, acc=0.722, loss=63.884, backward_time=0.752, grad_norm=84.201, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.725e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 09:07:16,074 (trainer:732) INFO: 22epoch:train:9501-9600batch: iter_time=9.991e-05, forward_time=0.111, loss_ctc=71.239, loss_att=61.029, acc=0.693, loss=64.092, backward_time=0.754, grad_norm=112.479, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.723e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 09:08:55,956 (trainer:732) INFO: 22epoch:train:9601-9700batch: iter_time=1.011e-04, forward_time=0.111, loss_ctc=69.983, loss_att=56.365, acc=0.703, loss=60.450, backward_time=0.753, grad_norm=101.191, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.722e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 09:10:39,207 (trainer:732) INFO: 22epoch:train:9701-9800batch: iter_time=1.137e-04, forward_time=0.129, loss_ctc=55.743, loss_att=43.049, acc=0.703, loss=46.857, backward_time=0.756, grad_norm=90.349, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.124, optim0_lr0=7.720e-05, train_time=2.065
+[gpua003:0/64] 2023-07-07 09:12:19,264 (trainer:732) INFO: 22epoch:train:9801-9900batch: iter_time=9.062e-05, forward_time=0.110, loss_ctc=69.518, loss_att=49.134, acc=0.711, loss=55.249, backward_time=0.753, grad_norm=100.063, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.718e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 09:14:01,810 (trainer:732) INFO: 22epoch:train:9901-10000batch: iter_time=9.648e-05, forward_time=0.129, loss_ctc=69.398, loss_att=49.229, acc=0.721, loss=55.280, backward_time=0.756, grad_norm=90.245, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.716e-05, train_time=2.051
+[gpua003:0/64] 2023-07-07 09:26:51,892 (trainer:338) INFO: 22epoch results: [train] iter_time=0.222, forward_time=0.112, loss_ctc=70.052, loss_att=54.212, acc=0.701, loss=58.964, backward_time=0.757, grad_norm=96.262, clip=100.000, loss_scale=1.476e+20, optim_step_time=0.113, optim0_lr0=7.809e-05, train_time=2.580, time=3 hours, 35 minutes and 15.27 seconds, total_count=190000, gpu_max_cached_mem_GB=37.779, [valid] loss_ctc=53.056, cer_ctc=0.291, loss_att=42.969, acc=0.658, cer=0.388, wer=0.991, loss=45.995, time=6 minutes and 5.25 seconds, total_count=19734, gpu_max_cached_mem_GB=37.779, [att_plot] time=6 minutes and 31.11 seconds, total_count=0, gpu_max_cached_mem_GB=37.779
+[gpua003:0/64] 2023-07-07 09:27:11,470 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpua003:0/64] 2023-07-07 09:27:11,478 (trainer:272) INFO: 23/100epoch started. Estimated time to finish: 1 week, 5 days and 13 hours
+[gpua003:0/64] 2023-07-07 09:27:12,495 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpua003:0/64] 2023-07-07 09:27:32,952 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 09:27:36,862 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f952a1b7850>)
+[gpua003:0/64] 2023-07-07 09:27:36,865 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpua003:0/64] 2023-07-07 09:27:36,955 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 09:34:04,979 (trainer:732) INFO: 23epoch:train:1-100batch: iter_time=3.070, forward_time=0.136, loss_ctc=73.179, loss_att=57.756, acc=0.704, loss=62.383, backward_time=0.766, grad_norm=99.480, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.115, optim0_lr0=7.714e-05, train_time=8.259
+[gpua003:0/64] 2023-07-07 09:35:47,590 (trainer:732) INFO: 23epoch:train:101-200batch: iter_time=9.809e-05, forward_time=0.110, loss_ctc=65.447, loss_att=55.548, acc=0.684, loss=58.518, backward_time=0.757, grad_norm=105.042, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.712e-05, train_time=2.052
+[gpua003:0/64] 2023-07-07 09:37:44,269 (trainer:732) INFO: 23epoch:train:201-300batch: iter_time=2.992e-04, forward_time=0.200, loss_ctc=91.477, loss_att=64.962, acc=0.703, loss=72.917, backward_time=0.765, grad_norm=137.116, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.130, optim0_lr0=7.711e-05, train_time=2.331
+[gpua003:0/64] 2023-07-07 09:39:30,373 (trainer:732) INFO: 23epoch:train:301-400batch: iter_time=2.111e-04, forward_time=0.143, loss_ctc=74.075, loss_att=60.821, acc=0.698, loss=64.797, backward_time=0.764, grad_norm=100.292, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.118, optim0_lr0=7.709e-05, train_time=2.124
+[gpua003:0/64] 2023-07-07 09:41:13,338 (trainer:732) INFO: 23epoch:train:401-500batch: iter_time=9.774e-05, forward_time=0.108, loss_ctc=78.934, loss_att=61.524, acc=0.709, loss=66.747, backward_time=0.755, grad_norm=111.165, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.707e-05, train_time=2.059
+[gpua003:0/64] 2023-07-07 09:42:53,514 (trainer:732) INFO: 23epoch:train:501-600batch: iter_time=9.807e-05, forward_time=0.108, loss_ctc=69.498, loss_att=53.920, acc=0.698, loss=58.594, backward_time=0.752, grad_norm=97.973, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.705e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 09:44:41,445 (trainer:732) INFO: 23epoch:train:601-700batch: iter_time=8.987e-05, forward_time=0.108, loss_ctc=83.563, loss_att=61.774, acc=0.691, loss=68.311, backward_time=0.762, grad_norm=122.324, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.703e-05, train_time=2.158
+[gpua003:0/64] 2023-07-07 09:46:26,252 (trainer:732) INFO: 23epoch:train:701-800batch: iter_time=9.492e-05, forward_time=0.109, loss_ctc=74.540, loss_att=56.661, acc=0.697, loss=62.025, backward_time=0.755, grad_norm=102.834, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.701e-05, train_time=2.096
+[gpua003:0/64] 2023-07-07 09:47:10,710 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpua003:0/64] 2023-07-07 09:47:30,186 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 09:47:34,091 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a2f77df0>)
+[gpua003:0/64] 2023-07-07 09:47:34,145 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpua003:0/64] 2023-07-07 09:47:34,151 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 09:53:24,106 (trainer:732) INFO: 23epoch:train:801-900batch: iter_time=2.968, forward_time=0.137, loss_ctc=71.883, loss_att=53.675, acc=0.701, loss=59.138, backward_time=0.769, grad_norm=96.823, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.700e-05, train_time=8.356
+[gpua003:0/64] 2023-07-07 09:55:05,103 (trainer:732) INFO: 23epoch:train:901-1000batch: iter_time=1.047e-04, forward_time=0.110, loss_ctc=64.340, loss_att=51.496, acc=0.693, loss=55.349, backward_time=0.754, grad_norm=93.049, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.698e-05, train_time=2.020
+[gpua003:0/64] 2023-07-07 09:56:45,333 (trainer:732) INFO: 23epoch:train:1001-1100batch: iter_time=9.897e-05, forward_time=0.109, loss_ctc=82.590, loss_att=64.984, acc=0.698, loss=70.266, backward_time=0.751, grad_norm=115.985, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.696e-05, train_time=2.004
+[gpua003:0/64] 2023-07-07 09:58:25,170 (trainer:732) INFO: 23epoch:train:1101-1200batch: iter_time=9.800e-05, forward_time=0.109, loss_ctc=74.904, loss_att=57.611, acc=0.704, loss=62.799, backward_time=0.753, grad_norm=93.699, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.694e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 10:00:05,128 (trainer:732) INFO: 23epoch:train:1201-1300batch: iter_time=1.040e-04, forward_time=0.110, loss_ctc=77.700, loss_att=63.938, acc=0.709, loss=68.066, backward_time=0.753, grad_norm=120.473, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.114, optim0_lr0=7.692e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 10:01:45,015 (trainer:732) INFO: 23epoch:train:1301-1400batch: iter_time=1.311e-04, forward_time=0.110, loss_ctc=66.958, loss_att=50.399, acc=0.716, loss=55.367, backward_time=0.753, grad_norm=104.203, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.690e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 10:03:25,092 (trainer:732) INFO: 23epoch:train:1401-1500batch: iter_time=1.131e-04, forward_time=0.111, loss_ctc=79.489, loss_att=60.622, acc=0.688, loss=66.282, backward_time=0.753, grad_norm=110.078, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.689e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 10:05:04,751 (trainer:732) INFO: 23epoch:train:1501-1600batch: iter_time=9.681e-05, forward_time=0.109, loss_ctc=69.851, loss_att=56.849, acc=0.699, loss=60.749, backward_time=0.751, grad_norm=108.192, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.687e-05, train_time=1.993
+[gpua003:0/64] 2023-07-07 10:06:13,927 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpua003:0/64] 2023-07-07 10:06:33,050 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85a23e6620>)
+[gpua003:0/64] 2023-07-07 10:06:36,687 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpua003:0/64] 2023-07-07 10:06:36,693 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 10:10:24,633 (trainer:732) INFO: 23epoch:train:1601-1700batch: iter_time=1.378, forward_time=0.109, loss_ctc=70.081, loss_att=51.925, acc=0.700, loss=57.372, backward_time=0.765, grad_norm=91.494, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.685e-05, train_time=6.397
+[gpua003:0/64] 2023-07-07 10:12:05,102 (trainer:732) INFO: 23epoch:train:1701-1800batch: iter_time=1.008e-04, forward_time=0.109, loss_ctc=70.081, loss_att=57.940, acc=0.707, loss=61.582, backward_time=0.756, grad_norm=100.076, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.683e-05, train_time=2.009
+[gpua003:0/64] 2023-07-07 10:13:45,172 (trainer:732) INFO: 23epoch:train:1801-1900batch: iter_time=1.097e-04, forward_time=0.108, loss_ctc=72.888, loss_att=55.204, acc=0.697, loss=60.509, backward_time=0.752, grad_norm=114.953, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.112, optim0_lr0=7.681e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 10:15:25,082 (trainer:732) INFO: 23epoch:train:1901-2000batch: iter_time=9.785e-05, forward_time=0.109, loss_ctc=83.550, loss_att=66.001, acc=0.695, loss=71.265, backward_time=0.753, grad_norm=99.143, clip=100.000, loss_scale=2.951e+20, optim_step_time=0.113, optim0_lr0=7.680e-05, train_time=1.998
+[gpua003:0/64] 2023-07-07 10:17:04,949 (trainer:732) INFO: 23epoch:train:2001-2100batch: iter_time=9.886e-05, forward_time=0.110, loss_ctc=72.138, loss_att=59.700, acc=0.709, loss=63.431, backward_time=0.752, grad_norm=90.082, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.678e-05, train_time=1.997
+[gpua003:0/64] 2023-07-07 10:18:44,752 (trainer:732) INFO: 23epoch:train:2101-2200batch: iter_time=9.873e-05, forward_time=0.108, loss_ctc=71.072, loss_att=54.294, acc=0.707, loss=59.327, backward_time=0.752, grad_norm=109.276, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.676e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 10:20:24,385 (trainer:732) INFO: 23epoch:train:2201-2300batch: iter_time=9.689e-05, forward_time=0.107, loss_ctc=78.773, loss_att=58.998, acc=0.697, loss=64.930, backward_time=0.751, grad_norm=108.006, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.674e-05, train_time=1.992
+[gpua003:0/64] 2023-07-07 10:22:04,941 (trainer:732) INFO: 23epoch:train:2301-2400batch: iter_time=9.118e-05, forward_time=0.108, loss_ctc=74.926, loss_att=57.996, acc=0.700, loss=63.075, backward_time=0.752, grad_norm=123.642, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.672e-05, train_time=2.011
+[gpua003:0/64] 2023-07-07 10:23:49,235 (trainer:732) INFO: 23epoch:train:2401-2500batch: iter_time=9.225e-05, forward_time=0.107, loss_ctc=64.708, loss_att=53.380, acc=0.695, loss=56.779, backward_time=0.763, grad_norm=88.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.670e-05, train_time=2.086
+[gpua003:0/64] 2023-07-07 10:23:52,704 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpua003:0/64] 2023-07-07 10:24:12,052 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85f3d60520>)
+[gpua003:0/64] 2023-07-07 10:24:15,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpua003:0/64] 2023-07-07 10:24:15,680 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 10:29:09,460 (trainer:732) INFO: 23epoch:train:2501-2600batch: iter_time=1.283, forward_time=0.109, loss_ctc=73.117, loss_att=58.099, acc=0.706, loss=62.605, backward_time=0.766, grad_norm=90.498, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.669e-05, train_time=6.404
+[gpua003:0/64] 2023-07-07 10:30:49,559 (trainer:732) INFO: 23epoch:train:2601-2700batch: iter_time=8.534e-05, forward_time=0.109, loss_ctc=64.315, loss_att=53.246, acc=0.691, loss=56.567, backward_time=0.752, grad_norm=89.794, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.667e-05, train_time=2.002
+[gpua003:0/64] 2023-07-07 10:32:29,624 (trainer:732) INFO: 23epoch:train:2701-2800batch: iter_time=8.962e-05, forward_time=0.109, loss_ctc=85.891, loss_att=63.051, acc=0.705, loss=69.903, backward_time=0.754, grad_norm=94.837, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.665e-05, train_time=2.001
+[gpua003:0/64] 2023-07-07 10:34:09,982 (trainer:732) INFO: 23epoch:train:2801-2900batch: iter_time=9.237e-05, forward_time=0.109, loss_ctc=74.116, loss_att=59.956, acc=0.701, loss=64.204, backward_time=0.753, grad_norm=89.065, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.663e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 10:35:50,367 (trainer:732) INFO: 23epoch:train:2901-3000batch: iter_time=8.691e-05, forward_time=0.109, loss_ctc=79.211, loss_att=61.574, acc=0.710, loss=66.865, backward_time=0.753, grad_norm=102.506, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.661e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 10:37:31,411 (trainer:732) INFO: 23epoch:train:3001-3100batch: iter_time=9.464e-05, forward_time=0.109, loss_ctc=65.890, loss_att=51.091, acc=0.706, loss=55.530, backward_time=0.755, grad_norm=86.526, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.660e-05, train_time=2.021
+[gpua003:0/64] 2023-07-07 10:39:12,463 (trainer:732) INFO: 23epoch:train:3101-3200batch: iter_time=1.047e-04, forward_time=0.109, loss_ctc=78.191, loss_att=58.913, acc=0.696, loss=64.697, backward_time=0.758, grad_norm=107.870, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.658e-05, train_time=2.021
+[gpua003:0/64] 2023-07-07 10:40:55,933 (trainer:732) INFO: 23epoch:train:3201-3300batch: iter_time=1.031e-04, forward_time=0.108, loss_ctc=70.556, loss_att=54.398, acc=0.709, loss=59.245, backward_time=0.758, grad_norm=88.177, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.656e-05, train_time=2.069
+[gpua003:0/64] 2023-07-07 10:41:31,292 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpua003:0/64] 2023-07-07 10:41:50,605 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f860cc32c20>)
+[gpua003:0/64] 2023-07-07 10:41:54,287 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpua003:0/64] 2023-07-07 10:41:54,293 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 10:45:45,530 (trainer:732) INFO: 23epoch:train:3301-3400batch: iter_time=1.299, forward_time=0.108, loss_ctc=67.705, loss_att=53.054, acc=0.697, loss=57.449, backward_time=0.769, grad_norm=90.741, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.654e-05, train_time=5.792
+[gpua003:0/64] 2023-07-07 10:47:25,708 (trainer:732) INFO: 23epoch:train:3401-3500batch: iter_time=9.948e-05, forward_time=0.108, loss_ctc=69.513, loss_att=54.398, acc=0.710, loss=58.932, backward_time=0.753, grad_norm=90.557, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.653e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 10:49:05,710 (trainer:732) INFO: 23epoch:train:3501-3600batch: iter_time=9.595e-05, forward_time=0.109, loss_ctc=71.035, loss_att=55.784, acc=0.705, loss=60.359, backward_time=0.754, grad_norm=89.579, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.651e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 10:51:01,341 (trainer:732) INFO: 23epoch:train:3601-3700batch: iter_time=9.044e-05, forward_time=0.119, loss_ctc=84.230, loss_att=63.766, acc=0.703, loss=69.905, backward_time=0.776, grad_norm=98.539, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.649e-05, train_time=2.312
+[gpua003:0/64] 2023-07-07 10:52:56,875 (trainer:732) INFO: 23epoch:train:3701-3800batch: iter_time=6.396e-04, forward_time=0.131, loss_ctc=73.263, loss_att=59.816, acc=0.713, loss=63.850, backward_time=0.792, grad_norm=100.049, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.647e-05, train_time=2.310
+[gpua003:0/64] 2023-07-07 10:54:36,698 (trainer:732) INFO: 23epoch:train:3801-3900batch: iter_time=8.669e-05, forward_time=0.110, loss_ctc=66.313, loss_att=49.529, acc=0.713, loss=54.564, backward_time=0.753, grad_norm=92.675, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.645e-05, train_time=1.996
+[gpua003:0/64] 2023-07-07 10:56:17,417 (trainer:732) INFO: 23epoch:train:3901-4000batch: iter_time=9.704e-05, forward_time=0.112, loss_ctc=77.178, loss_att=56.536, acc=0.708, loss=62.729, backward_time=0.754, grad_norm=103.834, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.644e-05, train_time=2.014
+[gpua003:0/64] 2023-07-07 10:57:57,769 (trainer:732) INFO: 23epoch:train:4001-4100batch: iter_time=9.868e-05, forward_time=0.110, loss_ctc=72.979, loss_att=57.846, acc=0.699, loss=62.386, backward_time=0.753, grad_norm=115.524, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.642e-05, train_time=2.007
+[gpua003:0/64] 2023-07-07 10:59:19,759 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpua003:0/64] 2023-07-07 10:59:39,162 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85f3d177c0>)
+[gpua003:0/64] 2023-07-07 10:59:42,862 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpua003:0/64] 2023-07-07 10:59:42,869 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 11:04:06,757 (trainer:732) INFO: 23epoch:train:4101-4200batch: iter_time=2.609, forward_time=0.130, loss_ctc=65.738, loss_att=48.149, acc=0.704, loss=53.426, backward_time=0.768, grad_norm=87.940, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.115, optim0_lr0=7.640e-05, train_time=7.379
+[gpua003:0/64] 2023-07-07 11:05:49,241 (trainer:732) INFO: 23epoch:train:4201-4300batch: iter_time=1.057e-04, forward_time=0.109, loss_ctc=68.915, loss_att=58.433, acc=0.703, loss=61.578, backward_time=0.757, grad_norm=94.575, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.638e-05, train_time=2.050
+[gpua003:0/64] 2023-07-07 11:07:30,444 (trainer:732) INFO: 23epoch:train:4301-4400batch: iter_time=1.040e-04, forward_time=0.111, loss_ctc=71.161, loss_att=54.889, acc=0.690, loss=59.771, backward_time=0.754, grad_norm=100.822, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.636e-05, train_time=2.024
+[gpua003:0/64] 2023-07-07 11:09:10,593 (trainer:732) INFO: 23epoch:train:4401-4500batch: iter_time=1.108e-04, forward_time=0.110, loss_ctc=82.303, loss_att=66.740, acc=0.686, loss=71.409, backward_time=0.755, grad_norm=94.738, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.635e-05, train_time=2.003
+[gpua003:0/64] 2023-07-07 11:10:50,539 (trainer:732) INFO: 23epoch:train:4501-4600batch: iter_time=1.004e-04, forward_time=0.110, loss_ctc=71.919, loss_att=59.048, acc=0.707, loss=62.910, backward_time=0.753, grad_norm=85.604, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.633e-05, train_time=1.999
+[gpua003:0/64] 2023-07-07 11:12:30,296 (trainer:732) INFO: 23epoch:train:4601-4700batch: iter_time=1.109e-04, forward_time=0.110, loss_ctc=70.547, loss_att=55.273, acc=0.703, loss=59.856, backward_time=0.752, grad_norm=92.197, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.113, optim0_lr0=7.631e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 11:14:10,064 (trainer:732) INFO: 23epoch:train:4701-4800batch: iter_time=9.679e-05, forward_time=0.109, loss_ctc=78.198, loss_att=57.584, acc=0.701, loss=63.768, backward_time=0.754, grad_norm=100.076, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.629e-05, train_time=1.995
+[gpua003:0/64] 2023-07-07 11:15:50,073 (trainer:732) INFO: 23epoch:train:4801-4900batch: iter_time=1.089e-04, forward_time=0.110, loss_ctc=73.676, loss_att=58.368, acc=0.689, loss=62.960, backward_time=0.754, grad_norm=105.898, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.628e-05, train_time=2.000
+[gpua003:0/64] 2023-07-07 11:17:33,946 (trainer:732) INFO: 23epoch:train:4901-5000batch: iter_time=9.428e-05, forward_time=0.109, loss_ctc=64.333, loss_att=54.483, acc=0.690, loss=57.438, backward_time=0.757, grad_norm=94.071, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.626e-05, train_time=2.077
+[gpua003:0/64] 2023-07-07 11:17:39,457 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpua003:0/64] 2023-07-07 11:17:59,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f85f895fac0>)
+[gpua003:0/64] 2023-07-07 11:18:02,629 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpua003:0/64] 2023-07-07 11:18:02,635 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpua003:0/64] 2023-07-07 11:23:09,216 (trainer:732) INFO: 23epoch:train:5001-5100batch: iter_time=1.440, forward_time=0.127, loss_ctc=71.836, loss_att=56.120, acc=0.706, loss=60.835, backward_time=0.765, grad_norm=102.820, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.114, optim0_lr0=7.624e-05, train_time=6.705
+[gpua003:0/64] 2023-07-07 11:24:49,430 (trainer:732) INFO: 23epoch:train:5101-5200batch: iter_time=9.982e-05, forward_time=0.108, loss_ctc=63.033, loss_att=52.578, acc=0.690, loss=55.715, backward_time=0.754, grad_norm=77.578, clip=100.000, loss_scale=5.903e+20, optim_step_time=0.112, optim0_lr0=7.622e-05, train_time=2.004
+gpua087:2330954:2332476 [1] NCCL INFO comm 0xbc380f30 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua055:3866105:3867680 [2] NCCL INFO comm 0xa0bacc0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua087:2330955:2332481 [2] NCCL INFO comm 0x1091ecd0 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua031:1680702:1682220 [2] NCCL INFO comm 0x90042a50 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua055:3866104:3867675 [1] NCCL INFO comm 0x4ff24650 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua028:3269322:3270845 [1] NCCL INFO comm 0x50ff9ba0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua025:63838:65355 [2] NCCL INFO comm 0xc1f876b0 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua060:2854969:2856486 [1] NCCL INFO comm 0x8c2cb6d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua025:63837:65357 [1] NCCL INFO comm 0xa196ac90 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua060:2854970:2856496 [2] NCCL INFO comm 0xb4b68d30 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua003:350635:352158 [2] NCCL INFO comm 0xc165ff50 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua003:350634:352156 [1] NCCL INFO comm 0xb8217e10 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua028:3269323:3270853 [2] NCCL INFO comm 0x4fe1d010 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua053:959076:960598 [2] NCCL INFO comm 0xa5547430 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua090:2294099:2295633 [2] NCCL INFO comm 0x508070c0 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua029:1226922:1228446 [1] NCCL INFO comm 0x91446d0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua029:1226923:1228448 [2] NCCL INFO comm 0x9682050 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua057:1814426:1815949 [1] NCCL INFO comm 0xb6887810 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua057:1814427:1815959 [2] NCCL INFO comm 0x8ff8bf0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua035:1685218:1686747 [2] NCCL INFO comm 0x5149e590 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua035:1685217:1686742 [1] NCCL INFO comm 0x94073350 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua098:2101209:2102740 [1] NCCL INFO comm 0xb77452f0 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua005:322786:324303 [1] NCCL INFO comm 0x9e527b50 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua005:322787:324304 [2] NCCL INFO comm 0xa671d450 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua098:2101210:2102744 [2] NCCL INFO comm 0xb13e4b0 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua010:1622002:1623518 [2] NCCL INFO comm 0x95597d0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpua074:989793:991318 [2] NCCL INFO comm 0x50124340 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-2:
+gpua090:2294098:2295630 [1] NCCL INFO comm 0xb9291470 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua031:1680701:1682217 [1] NCCL INFO comm 0xb74170b0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 45] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804271 milliseconds before timing out.
+gpua053:959075:960591 [1] NCCL INFO comm 0x50f9bf70 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 21] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804411 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804302 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 14] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804276 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804307 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 53] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804259 milliseconds before timing out.
+Process SpawnProcess-3:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+RuntimeError: [Rank 2] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804368 milliseconds before timing out.
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 46] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804297 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 37] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804277 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804484 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 13] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804273 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 50] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804680 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 6] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804607 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 30] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804538 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 26] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804292 milliseconds before timing out.
+gpua010:1622001:1623523 [1] NCCL INFO comm 0x8e6a9490 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpua074:989792:991309 [1] NCCL INFO comm 0x91b8e50 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 5] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804559 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804434 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 42] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804489 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+Process SpawnProcess-3:
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804353 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+RuntimeError: [Rank 34] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804440 milliseconds before timing out.
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 58] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804403 milliseconds before timing out.
+Process SpawnProcess-2:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 61] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804595 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 62] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804654 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 10] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804678 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 22] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804467 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 38] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804274 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 54] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1804266 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 25] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805539 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 33] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805578 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805585 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 9] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805928 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1805925 milliseconds before timing out.
+gpua005:322788:324302 [3] NCCL INFO comm 0xb7586590 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua053:959077:960604 [3] NCCL INFO comm 0x8f7ecf20 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 7] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1816754 milliseconds before timing out.
+gpua087:2330956:2332486 [3] NCCL INFO comm 0x4fa40250 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua025:63839:65363 [3] NCCL INFO comm 0xc1e534d0 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua029:1226924:1228445 [3] NCCL INFO comm 0x502a1280 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 55] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817525 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 35] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817402 milliseconds before timing out.
+gpua098:2101211:2102741 [3] NCCL INFO comm 0xb9e844a0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpua028:3269324:3270856 [3] NCCL INFO comm 0x50758ff0 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 630, in train_one_epoch
+    scaler.scale(loss).backward()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/_tensor.py", line 488, in backward
+    torch.autograd.backward(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/autograd/__init__.py", line 197, in backward
+    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
+RuntimeError: [Rank 15] Caught collective operation timeout: WorkNCCL(SeqNum=9412683, OpType=ALLREDUCE, TensorShape=[1023], Timeout(ms)=1800000) ran for 1817790 milliseconds before timing out.
+gpua074:989794:991315 [3] NCCL INFO comm 0x51823d90 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+srun: error: gpua005: task 1: Exited with exit code 1
+srun: error: gpua029: task 5: Exited with exit code 1
+srun: error: gpua025: task 3: Exited with exit code 1
+srun: error: gpua060: task 11: Exited with exit code 1
+srun: error: gpua035: task 7: Exited with exit code 1
+srun: error: gpua003: task 0: Exited with exit code 1
+srun: error: gpua055: task 9: Exited with exit code 1
+srun: error: gpua010: task 2: Exited with exit code 1
+srun: error: gpua087: task 13: Exited with exit code 1
+srun: error: gpua057: task 10: Exited with exit code 1
+srun: error: gpua031: task 6: Exited with exit code 1
+srun: error: gpua090: task 14: Exited with exit code 1
+srun: error: gpua053: task 8: Exited with exit code 1
+srun: error: gpua028: task 4: Exited with exit code 1
+srun: error: gpua098: task 15: Exited with exit code 1
+srun: error: gpua074: task 12: Exited with exit code 1
+# Accounting: begin_time=1688614643
+# Accounting: end_time=1688748923
+# Accounting: time=134280 threads=1
+# Finished at Fri Jul 7 11:55:23 CDT 2023 with status 1
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.8.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.8.log
new file mode 100644
index 0000000000000000000000000000000000000000..ffd1fa591021c1d02652569656e11521a9eca358
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.8.log
@@ -0,0 +1,4904 @@
+# Running on gpub015.delta.ncsa.illinois.edu
+# Started at Tue Jul 4 13:05:28 CDT 2023
+# SLURMD_NODENAME=gpub015
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2127681
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2127681
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[015,026,031-032,036-037,049-053,078-082]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[015,026,031-032,036-037,049-053,078-082]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=879691
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub015
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_c90a07cb-a80d-424b-bc3c-f044f91f1dea
+[gpub015:0/64] 2023-07-04 13:08:42,453 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub015:0/64] 2023-07-04 13:08:43,721 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub015:0/64] 2023-07-04 13:08:43,750 (s2t:483) INFO: Vocabulary size: 50002
+[gpub015:0/64] 2023-07-04 13:08:58,245 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub015:0/64] 2023-07-04 13:08:58,254 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub015:0/64] 2023-07-04 13:08:58,254 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub015:0/64] 2023-07-04 13:08:58,254 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub015:0/64] 2023-07-04 13:08:58,256 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub015:0/64] 2023-07-04 13:08:58,944 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub015:0/64] 2023-07-04 13:09:08,464 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 13:09:08,613 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0971fdbeb0>)
+[gpub015:0/64] 2023-07-04 13:09:08,613 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub015:0/64] 2023-07-04 13:09:08,619 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub015:0/64] 2023-07-04 13:09:09,108 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 13:09:09,424 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0971fdbb50>)
+[gpub015:0/64] 2023-07-04 13:09:09,424 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub015:0/64] 2023-07-04 13:09:09,424 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub015:0/64] 2023-07-04 13:09:37,908 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub015:879780:879780 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:879780:879780 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:879780:879780 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub015:0/64] 2023-07-04 13:09:43,209 (trainer:284) INFO: 11/100epoch started
+[gpub015:0/64] 2023-07-04 13:09:43,268 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub015:0/64] 2023-07-04 13:10:00,590 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 13:10:03,964 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f07bf2c9510>)
+[gpub015:0/64] 2023-07-04 13:10:03,964 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub015:0/64] 2023-07-04 13:10:03,970 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+gpub032:3289606:3289606 [3] NCCL INFO cudaDriverVersion 12010
+gpub032:3289606:3289606 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3289606:3289606 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3289606:3289687 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3289606:3289687 [3] NCCL INFO Using network IB
+gpub032:3289606:3289687 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub032:3289606:3289687 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub032:3289606:3289687 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub032:3289606:3289687 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub032:3289606:3289687 [3] NCCL INFO Connected all rings
+gpub032:3289606:3289687 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub032:3289606:3289687 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub032:3289606:3289687 [3] NCCL INFO Connected all trees
+gpub032:3289606:3289687 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3289606:3289687 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3289606:3289687 [3] NCCL INFO comm 0x501cec20 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub015:879783:879783 [3] NCCL INFO cudaDriverVersion 12010
+gpub015:879783:879783 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:879783:879783 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:879783:879851 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:879783:879851 [3] NCCL INFO Using network IB
+gpub015:879783:879851 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub015:879783:879851 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub015:879783:879851 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub015:879783:879851 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub015:879783:879851 [3] NCCL INFO Connected all rings
+gpub015:879783:879851 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub015:879783:879851 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub015:879783:879851 [3] NCCL INFO Connected all trees
+gpub051:2913626:2913626 [3] NCCL INFO cudaDriverVersion 12010
+gpub051:2913626:2913626 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:2913626:2913626 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:2913626:2913705 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:2913626:2913705 [3] NCCL INFO Using network IB
+gpub051:2913626:2913705 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub051:2913626:2913705 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub051:2913626:2913705 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub051:2913626:2913705 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub051:2913626:2913705 [3] NCCL INFO Connected all rings
+gpub051:2913626:2913705 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub051:2913626:2913705 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub015:879783:879851 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:879783:879851 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:879783:879851 [3] NCCL INFO comm 0x5071eb50 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:2913626:2913705 [3] NCCL INFO Connected all trees
+gpub051:2913626:2913705 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:2913626:2913705 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:2913626:2913705 [3] NCCL INFO comm 0x9e42a10 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:2913625:2913625 [2] NCCL INFO cudaDriverVersion 12010
+gpub051:2913625:2913625 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:2913625:2913625 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:2913625:2913708 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:2913625:2913708 [2] NCCL INFO Using network IB
+gpub051:2913625:2913708 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub051:2913625:2913708 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub051:2913625:2913708 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub051:2913625:2913708 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub051:2913625:2913708 [2] NCCL INFO Connected all rings
+gpub051:2913625:2913708 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub051:2913625:2913708 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub051:2913625:2913708 [2] NCCL INFO Connected all trees
+gpub051:2913625:2913708 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:2913625:2913708 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:2913625:2913708 [2] NCCL INFO comm 0xb9b5ccd0 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub080:4113204:4113204 [1] NCCL INFO cudaDriverVersion 12010
+gpub080:4113204:4113204 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:4113204:4113204 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:4113204:4113287 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:4113204:4113287 [1] NCCL INFO Using network IB
+gpub080:4113204:4113287 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub080:4113204:4113287 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub080:4113204:4113287 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub080:4113204:4113287 [1] NCCL INFO Connected all rings
+gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub080:4113204:4113287 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub080:4113204:4113287 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub080:4113204:4113287 [1] NCCL INFO Connected all trees
+gpub080:4113204:4113287 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub080:4113204:4113287 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:4113204:4113287 [1] NCCL INFO comm 0xb71b4bf0 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:2657933:2657933 [1] NCCL INFO cudaDriverVersion 12010
+gpub079:2657933:2657933 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2657933:2657933 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2657933:2658006 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2657933:2658006 [1] NCCL INFO Using network IB
+gpub079:2657933:2658006 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub079:2657933:2658006 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub079:2657933:2658006 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub079:2657933:2658006 [1] NCCL INFO Connected all rings
+gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub079:2657933:2658006 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub079:2657933:2658006 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub079:2657933:2658006 [1] NCCL INFO Connected all trees
+gpub079:2657933:2658006 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2657933:2658006 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2657933:2658006 [1] NCCL INFO comm 0x8f776d0 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub052:1901667:1901667 [0] NCCL INFO cudaDriverVersion 12010
+gpub052:1901667:1901667 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:1901667:1901667 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:1901667:1901752 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:1901667:1901752 [0] NCCL INFO Using network IB
+gpub052:1901667:1901752 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub052:1901667:1901752 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub052:1901667:1901752 [0] NCCL INFO Connected all rings
+gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub052:1901667:1901752 [0] NCCL INFO Connected all trees
+gpub052:1901667:1901752 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:1901667:1901752 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:1901667:1901752 [0] NCCL INFO comm 0xbc2124a0 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub031:1921204:1921204 [0] NCCL INFO cudaDriverVersion 12010
+gpub031:1921204:1921204 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1921204:1921204 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1921204:1921285 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1921204:1921285 [0] NCCL INFO Using network IB
+gpub031:1921204:1921285 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub031:1921204:1921285 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub031:1921204:1921285 [0] NCCL INFO Connected all rings
+gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub031:1921204:1921285 [0] NCCL INFO Connected all trees
+gpub031:1921204:1921285 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1921204:1921285 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1921204:1921285 [0] NCCL INFO comm 0xb63f1750 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub031:1921207:1921207 [3] NCCL INFO cudaDriverVersion 12010
+gpub031:1921207:1921207 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1921207:1921207 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1921207:1921286 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1921207:1921286 [3] NCCL INFO Using network IB
+gpub031:1921207:1921286 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub031:1921207:1921286 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub031:1921207:1921286 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub031:1921207:1921286 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub031:1921207:1921286 [3] NCCL INFO Connected all rings
+gpub031:1921207:1921286 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub031:1921207:1921286 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub031:1921207:1921286 [3] NCCL INFO Connected all trees
+gpub031:1921207:1921286 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1921207:1921286 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1921207:1921286 [3] NCCL INFO comm 0x9451c60 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub052:1901668:1901668 [1] NCCL INFO cudaDriverVersion 12010
+gpub052:1901668:1901668 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:1901668:1901668 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:1901668:1901749 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:1901668:1901749 [1] NCCL INFO Using network IB
+gpub052:1901668:1901749 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub052:1901668:1901749 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub052:1901668:1901749 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub052:1901668:1901749 [1] NCCL INFO Connected all rings
+gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub052:1901668:1901749 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub052:1901668:1901749 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub052:1901668:1901749 [1] NCCL INFO Connected all trees
+gpub052:1901668:1901749 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:1901668:1901749 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:1901668:1901749 [1] NCCL INFO comm 0x50134230 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub080:4113203:4113203 [0] NCCL INFO cudaDriverVersion 12010
+gpub080:4113203:4113203 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:4113203:4113203 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:4113203:4113290 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:4113203:4113290 [0] NCCL INFO Using network IB
+gpub080:4113203:4113290 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub080:4113203:4113290 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub080:4113203:4113290 [0] NCCL INFO Connected all rings
+gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub080:4113203:4113290 [0] NCCL INFO Connected all trees
+gpub080:4113203:4113290 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub080:4113203:4113290 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:4113203:4113290 [0] NCCL INFO comm 0xa21d7f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub079:2657935:2657935 [3] NCCL INFO cudaDriverVersion 12010
+gpub079:2657935:2657935 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2657935:2657935 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2657935:2658008 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2657935:2658008 [3] NCCL INFO Using network IB
+gpub079:2657935:2658008 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub079:2657935:2658008 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub079:2657935:2658008 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub079:2657935:2658008 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub079:2657935:2658008 [3] NCCL INFO Connected all rings
+gpub079:2657935:2658008 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub079:2657935:2658008 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub079:2657935:2658008 [3] NCCL INFO Connected all trees
+gpub079:2657935:2658008 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2657935:2658008 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2657935:2658008 [3] NCCL INFO comm 0x4edd83d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub032:3289605:3289605 [2] NCCL INFO cudaDriverVersion 12010
+gpub032:3289605:3289605 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3289605:3289605 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3289605:3289686 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3289605:3289686 [2] NCCL INFO Using network IB
+gpub032:3289605:3289686 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub032:3289605:3289686 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub032:3289605:3289686 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub032:3289605:3289686 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub032:3289605:3289686 [2] NCCL INFO Connected all rings
+gpub032:3289605:3289686 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub032:3289605:3289686 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub032:3289605:3289686 [2] NCCL INFO Connected all trees
+gpub032:3289605:3289686 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3289605:3289686 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3289605:3289686 [2] NCCL INFO comm 0xb6f8bc90 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub032:3289604:3289604 [1] NCCL INFO cudaDriverVersion 12010
+gpub032:3289604:3289604 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3289604:3289604 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3289604:3289685 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3289604:3289685 [1] NCCL INFO Using network IB
+gpub032:3289604:3289685 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub032:3289604:3289685 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub032:3289604:3289685 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub032:3289604:3289685 [1] NCCL INFO Connected all rings
+gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub032:3289604:3289685 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub032:3289604:3289685 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub032:3289604:3289685 [1] NCCL INFO Connected all trees
+gpub032:3289604:3289685 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3289604:3289685 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3289604:3289685 [1] NCCL INFO comm 0x50c34690 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:2657932:2657932 [0] NCCL INFO cudaDriverVersion 12010
+gpub079:2657932:2657932 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2657932:2657932 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2657932:2658009 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2657932:2658009 [0] NCCL INFO Using network IB
+gpub079:2657932:2658009 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub079:2657932:2658009 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub079:2657932:2658009 [0] NCCL INFO Connected all rings
+gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub079:2657932:2658009 [0] NCCL INFO Connected all trees
+gpub079:2657932:2658009 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2657932:2658009 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2657932:2658009 [0] NCCL INFO comm 0x8c890be0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub031:1921205:1921205 [1] NCCL INFO cudaDriverVersion 12010
+gpub031:1921205:1921205 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1921205:1921205 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1921205:1921287 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1921205:1921287 [1] NCCL INFO Using network IB
+gpub031:1921205:1921287 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub031:1921205:1921287 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub031:1921205:1921287 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub031:1921205:1921287 [1] NCCL INFO Connected all rings
+gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub031:1921205:1921287 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub031:1921205:1921287 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub031:1921205:1921287 [1] NCCL INFO Connected all trees
+gpub031:1921205:1921287 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1921205:1921287 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1921205:1921287 [1] NCCL INFO comm 0x92a3a80 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub051:2913623:2913623 [0] NCCL INFO cudaDriverVersion 12010
+gpub051:2913623:2913623 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:2913623:2913623 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:2913623:2913706 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:2913623:2913706 [0] NCCL INFO Using network IB
+gpub051:2913623:2913706 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub051:2913623:2913706 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub051:2913623:2913706 [0] NCCL INFO Connected all rings
+gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub051:2913623:2913706 [0] NCCL INFO Connected all trees
+gpub051:2913623:2913706 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:2913623:2913706 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:2913623:2913706 [0] NCCL INFO comm 0x8dc3e980 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub050:1879226:1879226 [1] NCCL INFO cudaDriverVersion 12010
+gpub050:1879226:1879226 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:1879226:1879226 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:1879226:1879305 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:1879226:1879305 [1] NCCL INFO Using network IB
+gpub050:1879226:1879305 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub050:1879226:1879305 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub050:1879226:1879305 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub050:1879226:1879305 [1] NCCL INFO Connected all rings
+gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub080:4113206:4113206 [3] NCCL INFO cudaDriverVersion 12010
+gpub080:4113206:4113206 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:4113206:4113206 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:4113206:4113289 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:4113206:4113289 [3] NCCL INFO Using network IB
+gpub080:4113206:4113289 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub080:4113206:4113289 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub080:4113206:4113289 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub080:4113206:4113289 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub080:4113206:4113289 [3] NCCL INFO Connected all rings
+gpub080:4113206:4113289 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub080:4113206:4113289 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub050:1879226:1879305 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub050:1879226:1879305 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub050:1879226:1879305 [1] NCCL INFO Connected all trees
+gpub050:1879226:1879305 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:1879226:1879305 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:1879226:1879305 [1] NCCL INFO comm 0x50792660 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub080:4113206:4113289 [3] NCCL INFO Connected all trees
+gpub080:4113206:4113289 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub080:4113206:4113289 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:4113206:4113289 [3] NCCL INFO comm 0x8c72c2a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub050:1879227:1879227 [2] NCCL INFO cudaDriverVersion 12010
+gpub050:1879227:1879227 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:1879227:1879227 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:1879227:1879304 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:1879227:1879304 [2] NCCL INFO Using network IB
+gpub050:1879227:1879304 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub050:1879227:1879304 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub050:1879227:1879304 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub050:1879227:1879304 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub050:1879227:1879304 [2] NCCL INFO Connected all rings
+gpub050:1879227:1879304 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub050:1879227:1879304 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub049:4064877:4064877 [3] NCCL INFO cudaDriverVersion 12010
+gpub049:4064877:4064877 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:4064877:4064877 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:4064877:4064941 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:4064877:4064941 [3] NCCL INFO Using network IB
+gpub049:4064877:4064941 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub049:4064877:4064941 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub049:4064877:4064941 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub049:4064877:4064941 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub049:4064877:4064941 [3] NCCL INFO Connected all rings
+gpub049:4064877:4064941 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub049:4064877:4064941 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub050:1879227:1879304 [2] NCCL INFO Connected all trees
+gpub050:1879227:1879304 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:1879227:1879304 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:1879227:1879304 [2] NCCL INFO comm 0x50baa200 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub049:4064877:4064941 [3] NCCL INFO Connected all trees
+gpub049:4064877:4064941 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:4064877:4064941 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:4064877:4064941 [3] NCCL INFO comm 0x4f5c00a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub079:2657934:2657934 [2] NCCL INFO cudaDriverVersion 12010
+gpub079:2657934:2657934 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2657934:2657934 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2657934:2658007 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2657934:2658007 [2] NCCL INFO Using network IB
+gpub079:2657934:2658007 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub079:2657934:2658007 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub079:2657934:2658007 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub079:2657934:2658007 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub079:2657934:2658007 [2] NCCL INFO Connected all rings
+gpub079:2657934:2658007 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub079:2657934:2658007 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub079:2657934:2658007 [2] NCCL INFO Connected all trees
+gpub079:2657934:2658007 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2657934:2658007 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2657934:2658007 [2] NCCL INFO comm 0x505ec9b0 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub015:879782:879782 [2] NCCL INFO cudaDriverVersion 12010
+gpub015:879782:879782 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:879782:879782 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:879782:879850 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:879782:879850 [2] NCCL INFO Using network IB
+gpub015:879782:879850 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub015:879782:879850 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub015:879782:879850 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub015:879782:879850 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub015:879782:879850 [2] NCCL INFO Connected all rings
+gpub015:879782:879850 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub015:879782:879850 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub015:879782:879850 [2] NCCL INFO Connected all trees
+gpub015:879782:879850 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:879782:879850 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:879782:879850 [2] NCCL INFO comm 0x502ad7c0 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub036:1870498:1870498 [2] NCCL INFO cudaDriverVersion 12010
+gpub036:1870498:1870498 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0>
+gpub036:1870498:1870498 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub036:1870498:1870579 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0>
+gpub036:1870498:1870579 [2] NCCL INFO Using network IB
+gpub036:1870498:1870579 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub036:1870498:1870579 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub036:1870498:1870579 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub036:1870498:1870579 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub036:1870498:1870579 [2] NCCL INFO Connected all rings
+gpub036:1870498:1870579 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub036:1870498:1870579 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub036:1870498:1870579 [2] NCCL INFO Connected all trees
+gpub036:1870498:1870579 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub036:1870498:1870579 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub036:1870498:1870579 [2] NCCL INFO comm 0x50c66a10 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub031:1921206:1921206 [2] NCCL INFO cudaDriverVersion 12010
+gpub031:1921206:1921206 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1921206:1921206 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1921206:1921288 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1921206:1921288 [2] NCCL INFO Using network IB
+gpub031:1921206:1921288 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub031:1921206:1921288 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub031:1921206:1921288 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub031:1921206:1921288 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub031:1921206:1921288 [2] NCCL INFO Connected all rings
+gpub031:1921206:1921288 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub031:1921206:1921288 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub031:1921206:1921288 [2] NCCL INFO Connected all trees
+gpub031:1921206:1921288 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1921206:1921288 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1921206:1921288 [2] NCCL INFO comm 0xc2e65190 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub082:1518447:1518447 [2] NCCL INFO cudaDriverVersion 12010
+gpub082:1518447:1518447 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0>
+gpub082:1518447:1518447 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub082:1518447:1518526 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0>
+gpub082:1518447:1518526 [2] NCCL INFO Using network IB
+gpub082:1518447:1518526 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub082:1518447:1518526 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub082:1518447:1518526 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub082:1518447:1518526 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub082:1518447:1518526 [2] NCCL INFO Connected all rings
+gpub082:1518447:1518526 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub082:1518447:1518526 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub082:1518447:1518526 [2] NCCL INFO Connected all trees
+gpub082:1518447:1518526 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub082:1518447:1518526 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub082:1518447:1518526 [2] NCCL INFO comm 0xb6376a90 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub026:2433084:2433084 [0] NCCL INFO cudaDriverVersion 12010
+gpub026:2433084:2433084 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2433084:2433084 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2433084:2433166 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2433084:2433166 [0] NCCL INFO Using network IB
+gpub026:2433084:2433166 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub026:2433084:2433166 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub026:2433084:2433166 [0] NCCL INFO Connected all rings
+gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub026:2433084:2433166 [0] NCCL INFO Connected all trees
+gpub026:2433084:2433166 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2433084:2433166 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2433084:2433166 [0] NCCL INFO comm 0x4fe36690 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub049:4064874:4064874 [0] NCCL INFO cudaDriverVersion 12010
+gpub049:4064874:4064874 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:4064874:4064874 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:4064874:4064942 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:4064874:4064942 [0] NCCL INFO Using network IB
+gpub049:4064874:4064942 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub049:4064874:4064942 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub049:4064874:4064942 [0] NCCL INFO Connected all rings
+gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub049:4064874:4064942 [0] NCCL INFO Connected all trees
+gpub049:4064874:4064942 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:4064874:4064942 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:4064874:4064942 [0] NCCL INFO comm 0x500f4c60 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub082:1518446:1518446 [1] NCCL INFO cudaDriverVersion 12010
+gpub082:1518446:1518446 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0>
+gpub082:1518446:1518446 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub082:1518446:1518525 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0>
+gpub082:1518446:1518525 [1] NCCL INFO Using network IB
+gpub082:1518446:1518525 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub082:1518446:1518525 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub082:1518446:1518525 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub082:1518446:1518525 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub082:1518446:1518525 [1] NCCL INFO Connected all rings
+gpub082:1518446:1518525 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub082:1518446:1518525 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub082:1518446:1518525 [1] NCCL INFO Connected all trees
+gpub082:1518446:1518525 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub082:1518446:1518525 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub082:1518446:1518525 [1] NCCL INFO comm 0xb6caaae0 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub036:1870496:1870496 [0] NCCL INFO cudaDriverVersion 12010
+gpub036:1870496:1870496 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0>
+gpub036:1870496:1870496 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub036:1870496:1870578 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0>
+gpub036:1870496:1870578 [0] NCCL INFO Using network IB
+gpub036:1870496:1870578 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub036:1870496:1870578 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub036:1870496:1870578 [0] NCCL INFO Connected all rings
+gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub036:1870496:1870578 [0] NCCL INFO Connected all trees
+gpub036:1870496:1870578 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub036:1870496:1870578 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub036:1870496:1870578 [0] NCCL INFO comm 0xad17bd0 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub081:2742227:2742227 [0] NCCL INFO cudaDriverVersion 12010
+gpub081:2742227:2742227 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0>
+gpub081:2742227:2742227 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub081:2742227:2742317 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0>
+gpub081:2742227:2742317 [0] NCCL INFO Using network IB
+gpub081:2742227:2742317 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub081:2742227:2742317 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub081:2742227:2742317 [0] NCCL INFO Connected all rings
+gpub026:2433087:2433087 [3] NCCL INFO cudaDriverVersion 12010
+gpub026:2433087:2433087 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2433087:2433087 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2433087:2433163 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2433087:2433163 [3] NCCL INFO Using network IB
+gpub026:2433087:2433163 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub026:2433087:2433163 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub026:2433087:2433163 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub026:2433087:2433163 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub026:2433087:2433163 [3] NCCL INFO Connected all rings
+gpub026:2433087:2433163 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub026:2433087:2433163 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub081:2742227:2742317 [0] NCCL INFO Connected all trees
+gpub081:2742227:2742317 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub081:2742227:2742317 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub081:2742227:2742317 [0] NCCL INFO comm 0x518b4950 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub026:2433087:2433163 [3] NCCL INFO Connected all trees
+gpub026:2433087:2433163 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2433087:2433163 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2433087:2433163 [3] NCCL INFO comm 0x50347080 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub051:2913624:2913624 [1] NCCL INFO cudaDriverVersion 12010
+gpub051:2913624:2913624 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.151<0>
+gpub051:2913624:2913624 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub051:2913624:2913707 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.151<0>
+gpub051:2913624:2913707 [1] NCCL INFO Using network IB
+gpub051:2913624:2913707 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub051:2913624:2913707 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub051:2913624:2913707 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub051:2913624:2913707 [1] NCCL INFO Connected all rings
+gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub051:2913624:2913707 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub051:2913624:2913707 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub051:2913624:2913707 [1] NCCL INFO Connected all trees
+gpub051:2913624:2913707 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub051:2913624:2913707 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub051:2913624:2913707 [1] NCCL INFO comm 0xbb329750 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub052:1901670:1901670 [3] NCCL INFO cudaDriverVersion 12010
+gpub052:1901670:1901670 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:1901670:1901670 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:1901670:1901750 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:1901670:1901750 [3] NCCL INFO Using network IB
+gpub052:1901670:1901750 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub052:1901670:1901750 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub052:1901670:1901750 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub052:1901670:1901750 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub052:1901670:1901750 [3] NCCL INFO Connected all rings
+gpub052:1901670:1901750 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub052:1901670:1901750 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub052:1901670:1901750 [3] NCCL INFO Connected all trees
+gpub052:1901670:1901750 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:1901670:1901750 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:1901670:1901750 [3] NCCL INFO comm 0xb6ced700 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub036:1870497:1870497 [1] NCCL INFO cudaDriverVersion 12010
+gpub036:1870497:1870497 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0>
+gpub036:1870497:1870497 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub036:1870497:1870580 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0>
+gpub036:1870497:1870580 [1] NCCL INFO Using network IB
+gpub036:1870497:1870580 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub036:1870497:1870580 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub036:1870497:1870580 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub036:1870497:1870580 [1] NCCL INFO Connected all rings
+gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub036:1870497:1870580 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub036:1870497:1870580 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub036:1870497:1870580 [1] NCCL INFO Connected all trees
+gpub036:1870497:1870580 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub036:1870497:1870580 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub036:1870497:1870580 [1] NCCL INFO comm 0x4fcaadc0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub078:4170391:4170391 [2] NCCL INFO cudaDriverVersion 12010
+gpub078:4170391:4170391 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:4170391:4170391 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:4170391:4170469 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:4170391:4170469 [2] NCCL INFO Using network IB
+gpub078:4170391:4170469 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub078:4170391:4170469 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub078:4170391:4170469 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub078:4170391:4170469 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub078:4170391:4170469 [2] NCCL INFO Connected all rings
+gpub078:4170391:4170469 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub078:4170391:4170469 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub078:4170391:4170469 [2] NCCL INFO Connected all trees
+gpub078:4170391:4170469 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:4170391:4170469 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:4170391:4170469 [2] NCCL INFO comm 0x5187a990 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub082:1518448:1518448 [3] NCCL INFO cudaDriverVersion 12010
+gpub082:1518448:1518448 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0>
+gpub082:1518448:1518448 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub082:1518448:1518524 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0>
+gpub082:1518448:1518524 [3] NCCL INFO Using network IB
+gpub082:1518448:1518524 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub082:1518448:1518524 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub082:1518448:1518524 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub082:1518448:1518524 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub082:1518448:1518524 [3] NCCL INFO Connected all rings
+gpub082:1518448:1518524 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub082:1518448:1518524 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub082:1518448:1518524 [3] NCCL INFO Connected all trees
+gpub082:1518448:1518524 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub082:1518448:1518524 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub082:1518448:1518524 [3] NCCL INFO comm 0x8c5b6f90 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:1664487:1664487 [1] NCCL INFO cudaDriverVersion 12010
+gpub053:1664487:1664487 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1664487:1664487 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1664487:1664558 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1664487:1664558 [1] NCCL INFO Using network IB
+gpub053:1664487:1664558 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub053:1664487:1664558 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub053:1664487:1664558 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub053:1664487:1664558 [1] NCCL INFO Connected all rings
+gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub053:1664487:1664558 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub053:1664487:1664558 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub053:1664487:1664558 [1] NCCL INFO Connected all trees
+gpub053:1664487:1664558 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1664487:1664558 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1664487:1664558 [1] NCCL INFO comm 0x506110d0 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub026:2433086:2433086 [2] NCCL INFO cudaDriverVersion 12010
+gpub026:2433086:2433086 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2433086:2433086 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2433086:2433164 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2433086:2433164 [2] NCCL INFO Using network IB
+gpub026:2433086:2433164 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub026:2433086:2433164 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub026:2433086:2433164 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub026:2433086:2433164 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub026:2433086:2433164 [2] NCCL INFO Connected all rings
+gpub026:2433086:2433164 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub026:2433086:2433164 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub026:2433086:2433164 [2] NCCL INFO Connected all trees
+gpub026:2433086:2433164 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2433086:2433164 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2433086:2433164 [2] NCCL INFO comm 0xc27df910 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub080:4113205:4113205 [2] NCCL INFO cudaDriverVersion 12010
+gpub080:4113205:4113205 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.180<0>
+gpub080:4113205:4113205 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub080:4113205:4113288 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.180<0>
+gpub080:4113205:4113288 [2] NCCL INFO Using network IB
+gpub080:4113205:4113288 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub080:4113205:4113288 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub080:4113205:4113288 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub080:4113205:4113288 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub080:4113205:4113288 [2] NCCL INFO Connected all rings
+gpub080:4113205:4113288 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub080:4113205:4113288 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub080:4113205:4113288 [2] NCCL INFO Connected all trees
+gpub080:4113205:4113288 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub080:4113205:4113288 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub080:4113205:4113288 [2] NCCL INFO comm 0x50af0e00 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub050:1879228:1879228 [3] NCCL INFO cudaDriverVersion 12010
+gpub050:1879228:1879228 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:1879228:1879228 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:1879228:1879302 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:1879228:1879302 [3] NCCL INFO Using network IB
+gpub050:1879228:1879302 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub050:1879228:1879302 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub050:1879228:1879302 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub050:1879228:1879302 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub050:1879228:1879302 [3] NCCL INFO Connected all rings
+gpub050:1879228:1879302 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub050:1879228:1879302 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub050:1879228:1879302 [3] NCCL INFO Connected all trees
+gpub050:1879228:1879302 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:1879228:1879302 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:1879228:1879302 [3] NCCL INFO comm 0x5177dae0 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub081:2742230:2742230 [3] NCCL INFO cudaDriverVersion 12010
+gpub081:2742230:2742230 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0>
+gpub081:2742230:2742230 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub081:2742230:2742314 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0>
+gpub081:2742230:2742314 [3] NCCL INFO Using network IB
+gpub081:2742230:2742314 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub081:2742230:2742314 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub081:2742230:2742314 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub081:2742230:2742314 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub081:2742230:2742314 [3] NCCL INFO Connected all rings
+gpub081:2742230:2742314 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub081:2742230:2742314 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub081:2742230:2742314 [3] NCCL INFO Connected all trees
+gpub081:2742230:2742314 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub081:2742230:2742314 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub081:2742230:2742314 [3] NCCL INFO comm 0xba992be0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub053:1664488:1664488 [2] NCCL INFO cudaDriverVersion 12010
+gpub053:1664488:1664488 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1664488:1664488 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1664488:1664560 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1664488:1664560 [2] NCCL INFO Using network IB
+gpub053:1664488:1664560 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub053:1664488:1664560 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub053:1664488:1664560 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub053:1664488:1664560 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub053:1664488:1664560 [2] NCCL INFO Connected all rings
+gpub053:1664488:1664560 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub053:1664488:1664560 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub053:1664488:1664560 [2] NCCL INFO Connected all trees
+gpub053:1664488:1664560 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1664488:1664560 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1664488:1664560 [2] NCCL INFO comm 0xe3027a0 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub078:4170390:4170390 [1] NCCL INFO cudaDriverVersion 12010
+gpub078:4170390:4170390 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:4170390:4170390 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:4170390:4170468 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:4170390:4170468 [1] NCCL INFO Using network IB
+gpub078:4170390:4170468 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub078:4170390:4170468 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub078:4170390:4170468 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub078:4170390:4170468 [1] NCCL INFO Connected all rings
+gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub078:4170390:4170468 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub078:4170390:4170468 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub078:4170390:4170468 [1] NCCL INFO Connected all trees
+gpub078:4170390:4170468 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:4170390:4170468 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:4170390:4170468 [1] NCCL INFO comm 0x1d97f440 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub078:4170389:4170389 [0] NCCL INFO cudaDriverVersion 12010
+gpub078:4170389:4170389 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:4170389:4170389 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:4170389:4170470 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:4170389:4170470 [0] NCCL INFO Using network IB
+gpub078:4170389:4170470 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub078:4170389:4170470 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub078:4170389:4170470 [0] NCCL INFO Connected all rings
+gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub078:4170389:4170470 [0] NCCL INFO Connected all trees
+gpub078:4170389:4170470 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:4170389:4170470 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:4170389:4170470 [0] NCCL INFO comm 0x4f656710 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub078:4170392:4170392 [3] NCCL INFO cudaDriverVersion 12010
+gpub078:4170392:4170392 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.178<0>
+gpub078:4170392:4170392 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub078:4170392:4170471 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.178<0>
+gpub078:4170392:4170471 [3] NCCL INFO Using network IB
+gpub078:4170392:4170471 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub078:4170392:4170471 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub078:4170392:4170471 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub078:4170392:4170471 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub078:4170392:4170471 [3] NCCL INFO Connected all rings
+gpub078:4170392:4170471 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub078:4170392:4170471 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub078:4170392:4170471 [3] NCCL INFO Connected all trees
+gpub078:4170392:4170471 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub078:4170392:4170471 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub078:4170392:4170471 [3] NCCL INFO comm 0x4f67f390 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub015:879780:879852 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:879780:879852 [0] NCCL INFO Using network IB
+gpub015:879780:879852 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub015:879780:879852 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub015:879780:879852 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub015:879780:879852 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub015:879780:879852 [0] NCCL INFO Connected all rings
+gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub015:879780:879852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub015:879780:879852 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub015:879780:879852 [0] NCCL INFO Connected all trees
+gpub015:879780:879852 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:879780:879852 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:879780:879852 [0] NCCL INFO comm 0x51871d20 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub015:879781:879781 [1] NCCL INFO cudaDriverVersion 12010
+gpub015:879781:879781 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:879781:879781 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:879781:879853 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:879781:879853 [1] NCCL INFO Using network IB
+gpub015:879781:879853 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub015:879781:879853 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub015:879781:879853 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub015:879781:879853 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub015:879781:879853 [1] NCCL INFO Connected all rings
+gpub015:879781:879853 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub015:879781:879853 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub015:879781:879853 [1] NCCL INFO Connected all trees
+gpub015:879781:879853 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:879781:879853 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:879781:879853 [1] NCCL INFO comm 0x8d09c1b0 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub053:1664486:1664486 [0] NCCL INFO cudaDriverVersion 12010
+gpub053:1664486:1664486 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1664486:1664486 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1664486:1664557 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1664486:1664557 [0] NCCL INFO Using network IB
+gpub053:1664486:1664557 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub053:1664486:1664557 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub053:1664486:1664557 [0] NCCL INFO Connected all rings
+gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub053:1664486:1664557 [0] NCCL INFO Connected all trees
+gpub053:1664486:1664557 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1664486:1664557 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1664486:1664557 [0] NCCL INFO comm 0x4f7ecd60 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub049:4064876:4064876 [2] NCCL INFO cudaDriverVersion 12010
+gpub049:4064876:4064876 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:4064876:4064876 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:4064876:4064939 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:4064876:4064939 [2] NCCL INFO Using network IB
+gpub049:4064876:4064939 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub049:4064876:4064939 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub049:4064876:4064939 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub049:4064876:4064939 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub049:4064876:4064939 [2] NCCL INFO Connected all rings
+gpub049:4064876:4064939 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub049:4064876:4064939 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub049:4064876:4064939 [2] NCCL INFO Connected all trees
+gpub049:4064876:4064939 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:4064876:4064939 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:4064876:4064939 [2] NCCL INFO comm 0xb89777d0 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub032:3289603:3289603 [0] NCCL INFO cudaDriverVersion 12010
+gpub032:3289603:3289603 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3289603:3289603 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3289603:3289688 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3289603:3289688 [0] NCCL INFO Using network IB
+gpub032:3289603:3289688 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub032:3289603:3289688 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub032:3289603:3289688 [0] NCCL INFO Connected all rings
+gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub032:3289603:3289688 [0] NCCL INFO Connected all trees
+gpub032:3289603:3289688 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3289603:3289688 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3289603:3289688 [0] NCCL INFO comm 0x9f95b40 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub081:2742228:2742228 [1] NCCL INFO cudaDriverVersion 12010
+gpub081:2742228:2742228 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0>
+gpub081:2742228:2742228 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub081:2742228:2742316 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0>
+gpub081:2742228:2742316 [1] NCCL INFO Using network IB
+gpub081:2742228:2742316 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub081:2742228:2742316 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub081:2742228:2742316 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub081:2742228:2742316 [1] NCCL INFO Connected all rings
+gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub081:2742228:2742316 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub081:2742228:2742316 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub081:2742228:2742316 [1] NCCL INFO Connected all trees
+gpub081:2742228:2742316 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub081:2742228:2742316 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub081:2742228:2742316 [1] NCCL INFO comm 0xb78a1250 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub037:1522723:1522723 [1] NCCL INFO cudaDriverVersion 12010
+gpub037:1522723:1522723 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1522723:1522723 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1522723:1522803 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1522723:1522803 [1] NCCL INFO Using network IB
+gpub037:1522723:1522803 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub037:1522723:1522803 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub037:1522723:1522803 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub037:1522723:1522803 [1] NCCL INFO Connected all rings
+gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub037:1522723:1522803 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub037:1522723:1522803 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub037:1522723:1522803 [1] NCCL INFO Connected all trees
+gpub037:1522723:1522803 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1522723:1522803 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1522723:1522803 [1] NCCL INFO comm 0xba5d23a0 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub050:1879225:1879225 [0] NCCL INFO cudaDriverVersion 12010
+gpub050:1879225:1879225 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.150<0>
+gpub050:1879225:1879225 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub050:1879225:1879303 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.150<0>
+gpub050:1879225:1879303 [0] NCCL INFO Using network IB
+gpub050:1879225:1879303 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub050:1879225:1879303 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub050:1879225:1879303 [0] NCCL INFO Connected all rings
+gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub050:1879225:1879303 [0] NCCL INFO Connected all trees
+gpub050:1879225:1879303 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub050:1879225:1879303 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub050:1879225:1879303 [0] NCCL INFO comm 0xa81f9440 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub052:1901669:1901669 [2] NCCL INFO cudaDriverVersion 12010
+gpub052:1901669:1901669 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.152<0>
+gpub052:1901669:1901669 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub052:1901669:1901751 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.152<0>
+gpub052:1901669:1901751 [2] NCCL INFO Using network IB
+gpub052:1901669:1901751 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub052:1901669:1901751 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub052:1901669:1901751 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub052:1901669:1901751 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub052:1901669:1901751 [2] NCCL INFO Connected all rings
+gpub052:1901669:1901751 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub052:1901669:1901751 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub052:1901669:1901751 [2] NCCL INFO Connected all trees
+gpub052:1901669:1901751 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub052:1901669:1901751 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub052:1901669:1901751 [2] NCCL INFO comm 0x50c05250 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub037:1522724:1522724 [2] NCCL INFO cudaDriverVersion 12010
+gpub037:1522724:1522724 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1522724:1522724 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1522724:1522800 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1522724:1522800 [2] NCCL INFO Using network IB
+gpub037:1522724:1522800 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub037:1522724:1522800 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub037:1522724:1522800 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub037:1522724:1522800 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub037:1522724:1522800 [2] NCCL INFO Connected all rings
+gpub037:1522724:1522800 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub037:1522724:1522800 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub037:1522724:1522800 [2] NCCL INFO Connected all trees
+gpub037:1522724:1522800 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1522724:1522800 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1522724:1522800 [2] NCCL INFO comm 0xab8ed350 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub082:1518445:1518445 [0] NCCL INFO cudaDriverVersion 12010
+gpub082:1518445:1518445 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.182<0>
+gpub082:1518445:1518445 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub082:1518445:1518527 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.182<0>
+gpub082:1518445:1518527 [0] NCCL INFO Using network IB
+gpub082:1518445:1518527 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub082:1518445:1518527 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub082:1518445:1518527 [0] NCCL INFO Connected all rings
+gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub082:1518445:1518527 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub082:1518445:1518527 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub082:1518445:1518527 [0] NCCL INFO Connected all trees
+gpub082:1518445:1518527 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub082:1518445:1518527 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub082:1518445:1518527 [0] NCCL INFO comm 0x519aa9d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub081:2742229:2742229 [2] NCCL INFO cudaDriverVersion 12010
+gpub081:2742229:2742229 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.181<0>
+gpub081:2742229:2742229 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub081:2742229:2742315 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.181<0>
+gpub081:2742229:2742315 [2] NCCL INFO Using network IB
+gpub081:2742229:2742315 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub081:2742229:2742315 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub081:2742229:2742315 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub081:2742229:2742315 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub081:2742229:2742315 [2] NCCL INFO Connected all rings
+gpub081:2742229:2742315 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub081:2742229:2742315 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub081:2742229:2742315 [2] NCCL INFO Connected all trees
+gpub081:2742229:2742315 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub081:2742229:2742315 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub081:2742229:2742315 [2] NCCL INFO comm 0x50f92c00 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub026:2433085:2433085 [1] NCCL INFO cudaDriverVersion 12010
+gpub026:2433085:2433085 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.126<0>
+gpub026:2433085:2433085 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub026:2433085:2433165 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.126<0>
+gpub026:2433085:2433165 [1] NCCL INFO Using network IB
+gpub026:2433085:2433165 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub026:2433085:2433165 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub026:2433085:2433165 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub026:2433085:2433165 [1] NCCL INFO Connected all rings
+gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub026:2433085:2433165 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub026:2433085:2433165 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub026:2433085:2433165 [1] NCCL INFO Connected all trees
+gpub026:2433085:2433165 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub026:2433085:2433165 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub026:2433085:2433165 [1] NCCL INFO comm 0xb7dab990 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub049:4064875:4064875 [1] NCCL INFO cudaDriverVersion 12010
+gpub049:4064875:4064875 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.149<0>
+gpub049:4064875:4064875 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub049:4064875:4064940 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.149<0>
+gpub049:4064875:4064940 [1] NCCL INFO Using network IB
+gpub049:4064875:4064940 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub049:4064875:4064940 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub049:4064875:4064940 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub049:4064875:4064940 [1] NCCL INFO Connected all rings
+gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub049:4064875:4064940 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub049:4064875:4064940 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub049:4064875:4064940 [1] NCCL INFO Connected all trees
+gpub049:4064875:4064940 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub049:4064875:4064940 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub049:4064875:4064940 [1] NCCL INFO comm 0xa8769be0 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub037:1522722:1522722 [0] NCCL INFO cudaDriverVersion 12010
+gpub037:1522722:1522722 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1522722:1522722 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1522722:1522802 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1522722:1522802 [0] NCCL INFO Using network IB
+gpub037:1522722:1522802 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub037:1522722:1522802 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub037:1522722:1522802 [0] NCCL INFO Connected all rings
+gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub037:1522722:1522802 [0] NCCL INFO Connected all trees
+gpub037:1522722:1522802 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1522722:1522802 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1522722:1522802 [0] NCCL INFO comm 0x514cae40 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub053:1664489:1664489 [3] NCCL INFO cudaDriverVersion 12010
+gpub053:1664489:1664489 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.153<0>
+gpub053:1664489:1664489 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub053:1664489:1664559 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.153<0>
+gpub053:1664489:1664559 [3] NCCL INFO Using network IB
+gpub053:1664489:1664559 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub053:1664489:1664559 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub053:1664489:1664559 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub053:1664489:1664559 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub053:1664489:1664559 [3] NCCL INFO Connected all rings
+gpub053:1664489:1664559 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub053:1664489:1664559 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub053:1664489:1664559 [3] NCCL INFO Connected all trees
+gpub053:1664489:1664559 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub053:1664489:1664559 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub053:1664489:1664559 [3] NCCL INFO comm 0xa9e28fe0 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub037:1522725:1522725 [3] NCCL INFO cudaDriverVersion 12010
+gpub037:1522725:1522725 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.137<0>
+gpub037:1522725:1522725 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub037:1522725:1522801 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.137<0>
+gpub037:1522725:1522801 [3] NCCL INFO Using network IB
+gpub037:1522725:1522801 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub037:1522725:1522801 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub037:1522725:1522801 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub037:1522725:1522801 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub037:1522725:1522801 [3] NCCL INFO Connected all rings
+gpub037:1522725:1522801 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub037:1522725:1522801 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub037:1522725:1522801 [3] NCCL INFO Connected all trees
+gpub037:1522725:1522801 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub037:1522725:1522801 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub037:1522725:1522801 [3] NCCL INFO comm 0x4f7df910 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub036:1870499:1870499 [3] NCCL INFO cudaDriverVersion 12010
+gpub036:1870499:1870499 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.136<0>
+gpub036:1870499:1870499 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub036:1870499:1870581 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.136<0>
+gpub036:1870499:1870581 [3] NCCL INFO Using network IB
+gpub036:1870499:1870581 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub036:1870499:1870581 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub036:1870499:1870581 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub036:1870499:1870581 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub036:1870499:1870581 [3] NCCL INFO Connected all rings
+gpub036:1870499:1870581 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub036:1870499:1870581 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub036:1870499:1870581 [3] NCCL INFO Connected all trees
+gpub036:1870499:1870581 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub036:1870499:1870581 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub036:1870499:1870581 [3] NCCL INFO comm 0xa269c50 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub015:0/64] 2023-07-04 13:14:42,343 (trainer:732) INFO: 11epoch:train:1-100batch: iter_time=1.167, forward_time=0.250, loss_ctc=83.800, loss_att=63.903, acc=0.671, loss=69.872, backward_time=1.249, grad_norm=95.018, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.181, optim0_lr0=1.178e-04, train_time=5.980
+[gpub015:0/64] 2023-07-04 13:17:19,812 (trainer:732) INFO: 11epoch:train:101-200batch: iter_time=9.388e-05, forward_time=0.141, loss_ctc=70.875, loss_att=53.678, acc=0.690, loss=58.837, backward_time=1.239, grad_norm=82.003, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.178e-04, train_time=3.150
+[gpub015:0/64] 2023-07-04 13:19:56,533 (trainer:732) INFO: 11epoch:train:201-300batch: iter_time=9.683e-05, forward_time=0.140, loss_ctc=73.277, loss_att=62.501, acc=0.661, loss=65.734, backward_time=1.236, grad_norm=87.548, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.177e-04, train_time=3.134
+[gpub015:0/64] 2023-07-04 13:22:35,853 (trainer:732) INFO: 11epoch:train:301-400batch: iter_time=1.014e-04, forward_time=0.141, loss_ctc=79.358, loss_att=59.074, acc=0.690, loss=65.159, backward_time=1.241, grad_norm=91.769, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.176e-04, train_time=3.186
+[gpub015:0/64] 2023-07-04 13:25:20,103 (trainer:732) INFO: 11epoch:train:401-500batch: iter_time=1.025e-04, forward_time=0.140, loss_ctc=72.801, loss_att=54.986, acc=0.664, loss=60.331, backward_time=1.248, grad_norm=99.444, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.176e-04, train_time=3.285
+[gpub015:0/64] 2023-07-04 13:28:00,155 (trainer:732) INFO: 11epoch:train:501-600batch: iter_time=9.845e-05, forward_time=0.140, loss_ctc=72.472, loss_att=58.148, acc=0.641, loss=62.446, backward_time=1.241, grad_norm=85.575, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.175e-04, train_time=3.201
+[gpub015:0/64] 2023-07-04 13:30:47,766 (trainer:732) INFO: 11epoch:train:601-700batch: iter_time=1.032e-04, forward_time=0.142, loss_ctc=74.485, loss_att=62.987, acc=0.667, loss=66.436, backward_time=1.247, grad_norm=81.333, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.174e-04, train_time=3.352
+[gpub015:0/64] 2023-07-04 13:33:37,533 (trainer:732) INFO: 11epoch:train:701-800batch: iter_time=1.023e-04, forward_time=0.140, loss_ctc=81.733, loss_att=62.947, acc=0.691, loss=68.583, backward_time=1.245, grad_norm=94.965, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.174e-04, train_time=3.395
+[gpub015:0/64] 2023-07-04 13:34:38,137 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub015:0/64] 2023-07-04 13:34:56,174 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 13:34:59,472 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f07bf2cb850>)
+[gpub015:0/64] 2023-07-04 13:34:59,473 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub015:0/64] 2023-07-04 13:34:59,479 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 13:40:59,440 (trainer:732) INFO: 11epoch:train:801-900batch: iter_time=1.311, forward_time=0.142, loss_ctc=85.490, loss_att=68.439, acc=0.664, loss=73.554, backward_time=1.245, grad_norm=86.487, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.173e-04, train_time=8.838
+[gpub015:0/64] 2023-07-04 13:43:36,689 (trainer:732) INFO: 11epoch:train:901-1000batch: iter_time=1.334e-04, forward_time=0.144, loss_ctc=72.168, loss_att=52.441, acc=0.692, loss=58.359, backward_time=1.237, grad_norm=87.029, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.172e-04, train_time=3.145
+[gpub015:0/64] 2023-07-04 13:46:13,656 (trainer:732) INFO: 11epoch:train:1001-1100batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=71.855, loss_att=63.794, acc=0.650, loss=66.212, backward_time=1.237, grad_norm=91.081, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.172e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 13:48:50,728 (trainer:732) INFO: 11epoch:train:1101-1200batch: iter_time=1.288e-04, forward_time=0.145, loss_ctc=78.754, loss_att=62.586, acc=0.672, loss=67.436, backward_time=1.237, grad_norm=94.410, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.171e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 13:51:27,702 (trainer:732) INFO: 11epoch:train:1201-1300batch: iter_time=1.265e-04, forward_time=0.144, loss_ctc=74.458, loss_att=56.373, acc=0.665, loss=61.798, backward_time=1.236, grad_norm=85.002, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.170e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 13:54:04,338 (trainer:732) INFO: 11epoch:train:1301-1400batch: iter_time=1.328e-04, forward_time=0.144, loss_ctc=74.356, loss_att=61.107, acc=0.633, loss=65.082, backward_time=1.236, grad_norm=96.927, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.170e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 13:56:40,929 (trainer:732) INFO: 11epoch:train:1401-1500batch: iter_time=1.283e-04, forward_time=0.144, loss_ctc=70.043, loss_att=58.437, acc=0.645, loss=61.919, backward_time=1.235, grad_norm=79.036, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.169e-04, train_time=3.132
+[gpub015:0/64] 2023-07-04 13:59:17,863 (trainer:732) INFO: 11epoch:train:1501-1600batch: iter_time=1.270e-04, forward_time=0.145, loss_ctc=75.925, loss_att=57.323, acc=0.695, loss=62.903, backward_time=1.237, grad_norm=80.385, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.168e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 14:01:03,303 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub015:0/64] 2023-07-04 14:01:21,149 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 14:01:24,530 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f07bf11f430>)
+[gpub015:0/64] 2023-07-04 14:01:24,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub015:0/64] 2023-07-04 14:01:24,537 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 14:05:06,649 (trainer:732) INFO: 11epoch:train:1601-1700batch: iter_time=1.217, forward_time=0.143, loss_ctc=84.818, loss_att=70.745, acc=0.672, loss=74.967, backward_time=1.246, grad_norm=97.726, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.178, optim0_lr0=1.168e-04, train_time=6.976
+[gpub015:0/64] 2023-07-04 14:07:44,458 (trainer:732) INFO: 11epoch:train:1701-1800batch: iter_time=1.115e-04, forward_time=0.144, loss_ctc=71.343, loss_att=51.236, acc=0.688, loss=57.268, backward_time=1.239, grad_norm=103.277, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.179, optim0_lr0=1.167e-04, train_time=3.156
+[gpub015:0/64] 2023-07-04 14:10:21,624 (trainer:732) INFO: 11epoch:train:1801-1900batch: iter_time=1.042e-04, forward_time=0.143, loss_ctc=73.993, loss_att=60.990, acc=0.670, loss=64.891, backward_time=1.239, grad_norm=98.902, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.179, optim0_lr0=1.167e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 14:12:58,635 (trainer:732) INFO: 11epoch:train:1901-2000batch: iter_time=1.055e-04, forward_time=0.143, loss_ctc=77.280, loss_att=60.422, acc=0.672, loss=65.479, backward_time=1.239, grad_norm=99.361, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.179, optim0_lr0=1.166e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 14:15:35,403 (trainer:732) INFO: 11epoch:train:2001-2100batch: iter_time=1.027e-04, forward_time=0.144, loss_ctc=71.434, loss_att=55.009, acc=0.670, loss=59.936, backward_time=1.236, grad_norm=89.022, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.165e-04, train_time=3.135
+[gpub015:0/64] 2023-07-04 14:18:12,306 (trainer:732) INFO: 11epoch:train:2101-2200batch: iter_time=9.694e-05, forward_time=0.143, loss_ctc=80.484, loss_att=62.826, acc=0.656, loss=68.123, backward_time=1.238, grad_norm=91.242, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.165e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 14:20:48,909 (trainer:732) INFO: 11epoch:train:2201-2300batch: iter_time=1.006e-04, forward_time=0.142, loss_ctc=66.251, loss_att=54.235, acc=0.643, loss=57.840, backward_time=1.236, grad_norm=75.648, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.164e-04, train_time=3.132
+[gpub015:0/64] 2023-07-04 14:23:25,954 (trainer:732) INFO: 11epoch:train:2301-2400batch: iter_time=9.957e-05, forward_time=0.143, loss_ctc=76.969, loss_att=61.945, acc=0.675, loss=66.452, backward_time=1.239, grad_norm=86.558, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.163e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 14:26:02,381 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub015:0/64] 2023-07-04 14:26:20,628 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 14:26:24,038 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f007568fb20>)
+[gpub015:0/64] 2023-07-04 14:26:24,038 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub015:0/64] 2023-07-04 14:26:24,044 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 14:29:27,606 (trainer:732) INFO: 11epoch:train:2401-2500batch: iter_time=1.234, forward_time=0.142, loss_ctc=83.156, loss_att=65.525, acc=0.675, loss=70.814, backward_time=1.245, grad_norm=87.609, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.163e-04, train_time=7.233
+[gpub015:0/64] 2023-07-04 14:32:05,977 (trainer:732) INFO: 11epoch:train:2501-2600batch: iter_time=9.809e-05, forward_time=0.143, loss_ctc=78.756, loss_att=58.058, acc=0.689, loss=64.267, backward_time=1.245, grad_norm=89.189, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.162e-04, train_time=3.167
+[gpub015:0/64] 2023-07-04 14:34:43,361 (trainer:732) INFO: 11epoch:train:2601-2700batch: iter_time=8.955e-05, forward_time=0.141, loss_ctc=73.761, loss_att=55.772, acc=0.686, loss=61.169, backward_time=1.240, grad_norm=84.815, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.162e-04, train_time=3.147
+[gpub015:0/64] 2023-07-04 14:37:20,015 (trainer:732) INFO: 11epoch:train:2701-2800batch: iter_time=1.035e-04, forward_time=0.142, loss_ctc=73.778, loss_att=62.326, acc=0.658, loss=65.762, backward_time=1.236, grad_norm=88.821, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.161e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 14:39:56,726 (trainer:732) INFO: 11epoch:train:2801-2900batch: iter_time=1.067e-04, forward_time=0.142, loss_ctc=77.384, loss_att=58.538, acc=0.690, loss=64.192, backward_time=1.237, grad_norm=82.802, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.160e-04, train_time=3.134
+[gpub015:0/64] 2023-07-04 14:42:33,340 (trainer:732) INFO: 11epoch:train:2901-3000batch: iter_time=1.122e-04, forward_time=0.142, loss_ctc=70.876, loss_att=53.471, acc=0.667, loss=58.692, backward_time=1.236, grad_norm=78.599, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.160e-04, train_time=3.132
+[gpub015:0/64] 2023-07-04 14:45:10,899 (trainer:732) INFO: 11epoch:train:3001-3100batch: iter_time=1.203e-04, forward_time=0.142, loss_ctc=71.915, loss_att=58.489, acc=0.639, loss=62.517, backward_time=1.238, grad_norm=79.638, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.159e-04, train_time=3.151
+[gpub015:0/64] 2023-07-04 14:47:55,543 (trainer:732) INFO: 11epoch:train:3101-3200batch: iter_time=1.053e-04, forward_time=0.143, loss_ctc=73.017, loss_att=63.582, acc=0.657, loss=66.413, backward_time=1.249, grad_norm=104.209, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.158e-04, train_time=3.293
+[gpub015:0/64] 2023-07-04 14:50:39,507 (trainer:732) INFO: 11epoch:train:3201-3300batch: iter_time=1.197e-04, forward_time=0.143, loss_ctc=79.855, loss_att=60.339, acc=0.696, loss=66.194, backward_time=1.242, grad_norm=90.048, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.158e-04, train_time=3.279
+[gpub015:0/64] 2023-07-04 14:51:31,262 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub015:0/64] 2023-07-04 14:51:49,250 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 14:51:52,772 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070bc33e50>)
+[gpub015:0/64] 2023-07-04 14:51:52,773 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub015:0/64] 2023-07-04 14:51:52,779 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 14:57:01,433 (trainer:732) INFO: 11epoch:train:3301-3400batch: iter_time=1.215, forward_time=0.143, loss_ctc=83.729, loss_att=64.933, acc=0.671, loss=70.572, backward_time=1.246, grad_norm=93.661, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.157e-04, train_time=7.638
+[gpub015:0/64] 2023-07-04 14:59:38,410 (trainer:732) INFO: 11epoch:train:3401-3500batch: iter_time=1.059e-04, forward_time=0.142, loss_ctc=72.534, loss_att=53.184, acc=0.696, loss=58.989, backward_time=1.236, grad_norm=82.069, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.157e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 15:02:15,990 (trainer:732) INFO: 11epoch:train:3501-3600batch: iter_time=1.292e-04, forward_time=0.142, loss_ctc=72.125, loss_att=60.773, acc=0.657, loss=64.179, backward_time=1.242, grad_norm=79.186, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.156e-04, train_time=3.151
+[gpub015:0/64] 2023-07-04 15:04:52,996 (trainer:732) INFO: 11epoch:train:3601-3700batch: iter_time=1.228e-04, forward_time=0.144, loss_ctc=76.260, loss_att=59.534, acc=0.681, loss=64.551, backward_time=1.239, grad_norm=93.112, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.155e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 15:07:29,645 (trainer:732) INFO: 11epoch:train:3701-3800batch: iter_time=1.134e-04, forward_time=0.143, loss_ctc=75.428, loss_att=54.548, acc=0.673, loss=60.812, backward_time=1.236, grad_norm=79.497, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.155e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 15:10:06,182 (trainer:732) INFO: 11epoch:train:3801-3900batch: iter_time=1.151e-04, forward_time=0.142, loss_ctc=71.937, loss_att=59.873, acc=0.630, loss=63.492, backward_time=1.235, grad_norm=89.457, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.154e-04, train_time=3.131
+[gpub015:0/64] 2023-07-04 15:12:43,022 (trainer:732) INFO: 11epoch:train:3901-4000batch: iter_time=1.081e-04, forward_time=0.143, loss_ctc=71.573, loss_att=63.440, acc=0.652, loss=65.880, backward_time=1.237, grad_norm=79.058, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.153e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 15:15:19,772 (trainer:732) INFO: 11epoch:train:4001-4100batch: iter_time=1.220e-04, forward_time=0.143, loss_ctc=74.521, loss_att=56.145, acc=0.691, loss=61.658, backward_time=1.237, grad_norm=80.871, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.179, optim0_lr0=1.153e-04, train_time=3.135
+[gpub015:0/64] 2023-07-04 15:17:04,683 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub015:0/64] 2023-07-04 15:17:22,602 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 15:17:26,004 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070baa6fb0>)
+[gpub015:0/64] 2023-07-04 15:17:26,005 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub015:0/64] 2023-07-04 15:17:26,011 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 15:21:23,202 (trainer:732) INFO: 11epoch:train:4101-4200batch: iter_time=1.199, forward_time=0.143, loss_ctc=81.712, loss_att=65.217, acc=0.675, loss=70.165, backward_time=1.247, grad_norm=87.365, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.179, optim0_lr0=1.152e-04, train_time=7.268
+[gpub015:0/64] 2023-07-04 15:24:00,969 (trainer:732) INFO: 11epoch:train:4201-4300batch: iter_time=1.045e-04, forward_time=0.144, loss_ctc=72.537, loss_att=51.252, acc=0.700, loss=57.638, backward_time=1.240, grad_norm=85.262, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.152e-04, train_time=3.155
+[gpub015:0/64] 2023-07-04 15:26:37,814 (trainer:732) INFO: 11epoch:train:4301-4400batch: iter_time=1.208e-04, forward_time=0.142, loss_ctc=71.997, loss_att=59.331, acc=0.679, loss=63.131, backward_time=1.238, grad_norm=103.945, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.151e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 15:29:14,481 (trainer:732) INFO: 11epoch:train:4401-4500batch: iter_time=1.185e-04, forward_time=0.143, loss_ctc=78.545, loss_att=61.794, acc=0.684, loss=66.819, backward_time=1.237, grad_norm=96.020, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.150e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 15:31:51,324 (trainer:732) INFO: 11epoch:train:4501-4600batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=72.669, loss_att=54.380, acc=0.683, loss=59.867, backward_time=1.236, grad_norm=94.169, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.150e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 15:34:28,101 (trainer:732) INFO: 11epoch:train:4601-4700batch: iter_time=1.150e-04, forward_time=0.142, loss_ctc=78.958, loss_att=61.475, acc=0.651, loss=66.720, backward_time=1.237, grad_norm=82.362, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.149e-04, train_time=3.135
+[gpub015:0/64] 2023-07-04 15:37:05,127 (trainer:732) INFO: 11epoch:train:4701-4800batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=67.006, loss_att=56.681, acc=0.658, loss=59.779, backward_time=1.237, grad_norm=76.677, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.149e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 15:39:42,072 (trainer:732) INFO: 11epoch:train:4801-4900batch: iter_time=1.144e-04, forward_time=0.142, loss_ctc=76.017, loss_att=60.305, acc=0.690, loss=65.018, backward_time=1.238, grad_norm=93.325, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.148e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 15:42:18,923 (trainer:732) INFO: 11epoch:train:4901-5000batch: iter_time=1.163e-04, forward_time=0.143, loss_ctc=80.694, loss_att=63.853, acc=0.683, loss=68.905, backward_time=1.239, grad_norm=94.593, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.147e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 15:42:20,304 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub015:0/64] 2023-07-04 15:42:38,424 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 15:42:41,782 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070babb520>)
+[gpub015:0/64] 2023-07-04 15:42:41,782 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub015:0/64] 2023-07-04 15:42:41,788 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 15:48:19,632 (trainer:732) INFO: 11epoch:train:5001-5100batch: iter_time=1.237, forward_time=0.144, loss_ctc=78.222, loss_att=59.300, acc=0.679, loss=64.976, backward_time=1.247, grad_norm=95.238, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.147e-04, train_time=7.214
+[gpub015:0/64] 2023-07-04 15:50:56,912 (trainer:732) INFO: 11epoch:train:5101-5200batch: iter_time=1.348e-04, forward_time=0.142, loss_ctc=70.037, loss_att=54.648, acc=0.687, loss=59.265, backward_time=1.236, grad_norm=85.950, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.146e-04, train_time=3.145
+[gpub015:0/64] 2023-07-04 15:53:33,723 (trainer:732) INFO: 11epoch:train:5201-5300batch: iter_time=1.410e-04, forward_time=0.143, loss_ctc=74.060, loss_att=61.954, acc=0.662, loss=65.585, backward_time=1.236, grad_norm=92.304, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.146e-04, train_time=3.136
+[gpub015:0/64] 2023-07-04 15:56:10,431 (trainer:732) INFO: 11epoch:train:5301-5400batch: iter_time=1.411e-04, forward_time=0.144, loss_ctc=74.817, loss_att=56.868, acc=0.686, loss=62.253, backward_time=1.235, grad_norm=79.959, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.145e-04, train_time=3.134
+[gpub015:0/64] 2023-07-04 15:58:47,203 (trainer:732) INFO: 11epoch:train:5401-5500batch: iter_time=1.433e-04, forward_time=0.144, loss_ctc=73.246, loss_att=57.581, acc=0.663, loss=62.281, backward_time=1.236, grad_norm=89.297, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.144e-04, train_time=3.135
+[gpub015:0/64] 2023-07-04 16:01:23,856 (trainer:732) INFO: 11epoch:train:5501-5600batch: iter_time=1.234e-04, forward_time=0.143, loss_ctc=69.266, loss_att=52.780, acc=0.658, loss=57.726, backward_time=1.235, grad_norm=84.838, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.144e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 16:04:00,841 (trainer:732) INFO: 11epoch:train:5601-5700batch: iter_time=1.416e-04, forward_time=0.144, loss_ctc=70.474, loss_att=61.191, acc=0.661, loss=63.976, backward_time=1.238, grad_norm=85.157, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.143e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 16:06:37,726 (trainer:732) INFO: 11epoch:train:5701-5800batch: iter_time=1.304e-04, forward_time=0.143, loss_ctc=83.723, loss_att=67.490, acc=0.687, loss=72.360, backward_time=1.237, grad_norm=82.489, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.143e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 16:07:32,415 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub015:0/64] 2023-07-04 16:07:50,213 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 16:07:53,634 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06f7fafb20>)
+[gpub015:0/64] 2023-07-04 16:07:53,634 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub015:0/64] 2023-07-04 16:07:53,641 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 16:12:26,124 (trainer:732) INFO: 11epoch:train:5801-5900batch: iter_time=1.216, forward_time=0.142, loss_ctc=78.153, loss_att=58.359, acc=0.683, loss=64.297, backward_time=1.245, grad_norm=81.488, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.142e-04, train_time=6.968
+[gpub015:0/64] 2023-07-04 16:15:03,120 (trainer:732) INFO: 11epoch:train:5901-6000batch: iter_time=1.015e-04, forward_time=0.141, loss_ctc=75.076, loss_att=52.764, acc=0.698, loss=59.457, backward_time=1.236, grad_norm=87.247, clip=100.000, loss_scale=5.498e+11, optim_step_time=0.178, optim0_lr0=1.141e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 16:17:40,160 (trainer:732) INFO: 11epoch:train:6001-6100batch: iter_time=1.041e-04, forward_time=0.142, loss_ctc=72.470, loss_att=62.178, acc=0.673, loss=65.265, backward_time=1.237, grad_norm=83.009, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.141e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 16:20:17,104 (trainer:732) INFO: 11epoch:train:6101-6200batch: iter_time=1.004e-04, forward_time=0.144, loss_ctc=74.977, loss_att=58.241, acc=0.691, loss=63.262, backward_time=1.238, grad_norm=88.495, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.140e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 16:22:54,294 (trainer:732) INFO: 11epoch:train:6201-6300batch: iter_time=1.117e-04, forward_time=0.142, loss_ctc=73.711, loss_att=55.176, acc=0.683, loss=60.737, backward_time=1.237, grad_norm=84.983, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.140e-04, train_time=3.144
+[gpub015:0/64] 2023-07-04 16:25:30,906 (trainer:732) INFO: 11epoch:train:6301-6400batch: iter_time=1.066e-04, forward_time=0.142, loss_ctc=75.193, loss_att=58.962, acc=0.652, loss=63.832, backward_time=1.237, grad_norm=87.664, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.139e-04, train_time=3.132
+[gpub015:0/64] 2023-07-04 16:28:07,562 (trainer:732) INFO: 11epoch:train:6401-6500batch: iter_time=1.093e-04, forward_time=0.142, loss_ctc=67.375, loss_att=55.362, acc=0.673, loss=58.966, backward_time=1.236, grad_norm=87.593, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.138e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 16:30:44,706 (trainer:732) INFO: 11epoch:train:6501-6600batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=74.061, loss_att=54.804, acc=0.709, loss=60.581, backward_time=1.238, grad_norm=79.167, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.138e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 16:32:33,226 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub015:0/64] 2023-07-04 16:32:51,116 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 16:32:54,503 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f02368d75e0>)
+[gpub015:0/64] 2023-07-04 16:32:54,503 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub015:0/64] 2023-07-04 16:32:54,509 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 16:36:49,987 (trainer:732) INFO: 11epoch:train:6601-6700batch: iter_time=1.193, forward_time=0.144, loss_ctc=83.775, loss_att=70.165, acc=0.684, loss=74.248, backward_time=1.261, grad_norm=89.442, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.137e-04, train_time=7.305
+[gpub015:0/64] 2023-07-04 16:39:27,770 (trainer:732) INFO: 11epoch:train:6701-6800batch: iter_time=1.102e-04, forward_time=0.142, loss_ctc=72.289, loss_att=51.250, acc=0.688, loss=57.562, backward_time=1.238, grad_norm=89.186, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.137e-04, train_time=3.155
+[gpub015:0/64] 2023-07-04 16:42:09,188 (trainer:732) INFO: 11epoch:train:6801-6900batch: iter_time=1.029e-04, forward_time=0.143, loss_ctc=73.752, loss_att=61.099, acc=0.672, loss=64.895, backward_time=1.240, grad_norm=83.648, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.136e-04, train_time=3.228
+[gpub015:0/64] 2023-07-04 16:44:51,998 (trainer:732) INFO: 11epoch:train:6901-7000batch: iter_time=1.079e-04, forward_time=0.150, loss_ctc=74.119, loss_att=58.766, acc=0.679, loss=63.372, backward_time=1.244, grad_norm=117.258, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.177, optim0_lr0=1.135e-04, train_time=3.256
+[gpub015:0/64] 2023-07-04 16:47:37,040 (trainer:732) INFO: 11epoch:train:7001-7100batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=70.548, loss_att=54.290, acc=0.676, loss=59.167, backward_time=1.239, grad_norm=82.573, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.177, optim0_lr0=1.135e-04, train_time=3.301
+[gpub015:0/64] 2023-07-04 16:50:15,687 (trainer:732) INFO: 11epoch:train:7101-7200batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=77.779, loss_att=60.926, acc=0.663, loss=65.982, backward_time=1.237, grad_norm=82.395, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.177, optim0_lr0=1.134e-04, train_time=3.173
+[gpub015:0/64] 2023-07-04 16:52:52,296 (trainer:732) INFO: 11epoch:train:7201-7300batch: iter_time=1.123e-04, forward_time=0.141, loss_ctc=68.907, loss_att=54.263, acc=0.649, loss=58.656, backward_time=1.235, grad_norm=81.090, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.134e-04, train_time=3.132
+[gpub015:0/64] 2023-07-04 16:55:29,304 (trainer:732) INFO: 11epoch:train:7301-7400batch: iter_time=1.139e-04, forward_time=0.143, loss_ctc=73.895, loss_att=61.349, acc=0.678, loss=65.113, backward_time=1.238, grad_norm=86.829, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.133e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 16:58:06,207 (trainer:732) INFO: 11epoch:train:7401-7500batch: iter_time=1.001e-04, forward_time=0.142, loss_ctc=80.322, loss_att=63.802, acc=0.685, loss=68.758, backward_time=1.236, grad_norm=94.311, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.133e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 16:58:11,666 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub015:0/64] 2023-07-04 16:58:29,313 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 16:58:32,661 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06f7bbf7c0>)
+[gpub015:0/64] 2023-07-04 16:58:32,661 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub015:0/64] 2023-07-04 16:58:32,667 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 17:05:39,174 (trainer:732) INFO: 11epoch:train:7501-7600batch: iter_time=1.528, forward_time=0.144, loss_ctc=78.582, loss_att=59.317, acc=0.684, loss=65.097, backward_time=1.252, grad_norm=88.598, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.132e-04, train_time=9.059
+[gpub015:0/64] 2023-07-04 17:08:17,107 (trainer:732) INFO: 11epoch:train:7601-7700batch: iter_time=1.000e-04, forward_time=0.143, loss_ctc=69.421, loss_att=54.039, acc=0.703, loss=58.653, backward_time=1.241, grad_norm=91.904, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.131e-04, train_time=3.158
+[gpub015:0/64] 2023-07-04 17:10:54,037 (trainer:732) INFO: 11epoch:train:7701-7800batch: iter_time=1.044e-04, forward_time=0.142, loss_ctc=72.978, loss_att=61.569, acc=0.673, loss=64.992, backward_time=1.239, grad_norm=94.885, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.131e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 17:13:31,101 (trainer:732) INFO: 11epoch:train:7801-7900batch: iter_time=9.732e-05, forward_time=0.143, loss_ctc=75.174, loss_att=54.680, acc=0.694, loss=60.828, backward_time=1.239, grad_norm=80.141, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.130e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 17:16:07,930 (trainer:732) INFO: 11epoch:train:7901-8000batch: iter_time=1.003e-04, forward_time=0.143, loss_ctc=72.983, loss_att=56.893, acc=0.676, loss=61.720, backward_time=1.238, grad_norm=83.576, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.130e-04, train_time=3.136
+[gpub015:0/64] 2023-07-04 17:18:44,592 (trainer:732) INFO: 11epoch:train:8001-8100batch: iter_time=1.019e-04, forward_time=0.143, loss_ctc=71.290, loss_att=54.002, acc=0.666, loss=59.188, backward_time=1.237, grad_norm=104.543, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.129e-04, train_time=3.133
+[gpub015:0/64] 2023-07-04 17:21:21,553 (trainer:732) INFO: 11epoch:train:8101-8200batch: iter_time=1.072e-04, forward_time=0.143, loss_ctc=71.502, loss_att=61.136, acc=0.676, loss=64.245, backward_time=1.238, grad_norm=79.194, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.129e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 17:23:58,799 (trainer:732) INFO: 11epoch:train:8201-8300batch: iter_time=9.818e-05, forward_time=0.144, loss_ctc=81.675, loss_att=65.649, acc=0.699, loss=70.457, backward_time=1.241, grad_norm=95.853, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.128e-04, train_time=3.145
+[gpub015:0/64] 2023-07-04 17:24:53,534 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub015:0/64] 2023-07-04 17:25:11,166 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 17:25:14,530 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f02368bc9d0>)
+[gpub015:0/64] 2023-07-04 17:25:14,530 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub015:0/64] 2023-07-04 17:25:14,536 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 17:29:58,326 (trainer:732) INFO: 11epoch:train:8301-8400batch: iter_time=1.239, forward_time=0.142, loss_ctc=77.508, loss_att=57.390, acc=0.690, loss=63.426, backward_time=1.251, grad_norm=84.822, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.127e-04, train_time=7.190
+[gpub015:0/64] 2023-07-04 17:32:39,866 (trainer:732) INFO: 11epoch:train:8401-8500batch: iter_time=1.145e-04, forward_time=0.143, loss_ctc=71.150, loss_att=51.139, acc=0.704, loss=57.143, backward_time=1.244, grad_norm=74.997, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.127e-04, train_time=3.231
+[gpub015:0/64] 2023-07-04 17:35:18,227 (trainer:732) INFO: 11epoch:train:8501-8600batch: iter_time=1.087e-04, forward_time=0.142, loss_ctc=70.281, loss_att=61.581, acc=0.677, loss=64.191, backward_time=1.238, grad_norm=88.779, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.126e-04, train_time=3.167
+[gpub015:0/64] 2023-07-04 17:38:03,061 (trainer:732) INFO: 11epoch:train:8601-8700batch: iter_time=0.002, forward_time=0.166, loss_ctc=74.328, loss_att=57.549, acc=0.693, loss=62.583, backward_time=1.255, grad_norm=120.714, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.126e-04, train_time=3.296
+[gpub015:0/64] 2023-07-04 17:40:40,597 (trainer:732) INFO: 11epoch:train:8701-8800batch: iter_time=1.146e-04, forward_time=0.144, loss_ctc=72.996, loss_att=53.756, acc=0.687, loss=59.528, backward_time=1.239, grad_norm=77.639, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.125e-04, train_time=3.150
+[gpub015:0/64] 2023-07-04 17:43:18,170 (trainer:732) INFO: 11epoch:train:8801-8900batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=72.990, loss_att=58.467, acc=0.657, loss=62.824, backward_time=1.238, grad_norm=79.134, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.183, optim0_lr0=1.125e-04, train_time=3.151
+[gpub015:0/64] 2023-07-04 17:46:06,875 (trainer:732) INFO: 11epoch:train:8901-9000batch: iter_time=1.090e-04, forward_time=0.179, loss_ctc=67.241, loss_att=54.706, acc=0.676, loss=58.467, backward_time=1.250, grad_norm=75.286, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.181, optim0_lr0=1.124e-04, train_time=3.374
+[gpub015:0/64] 2023-07-04 17:48:51,294 (trainer:732) INFO: 11epoch:train:9001-9100batch: iter_time=1.047e-04, forward_time=0.143, loss_ctc=75.541, loss_att=55.910, acc=0.706, loss=61.799, backward_time=1.246, grad_norm=79.268, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.123e-04, train_time=3.288
+[gpub015:0/64] 2023-07-04 17:50:45,067 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub015:0/64] 2023-07-04 17:51:02,822 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 17:51:06,236 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f01e54dcb50>)
+[gpub015:0/64] 2023-07-04 17:51:06,236 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub015:0/64] 2023-07-04 17:51:06,242 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 17:55:34,613 (trainer:732) INFO: 11epoch:train:9101-9200batch: iter_time=1.493, forward_time=0.145, loss_ctc=84.206, loss_att=70.331, acc=0.687, loss=74.493, backward_time=1.252, grad_norm=92.347, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.123e-04, train_time=8.066
+[gpub015:0/64] 2023-07-04 17:58:14,578 (trainer:732) INFO: 11epoch:train:9201-9300batch: iter_time=1.225e-04, forward_time=0.144, loss_ctc=72.015, loss_att=51.510, acc=0.694, loss=57.662, backward_time=1.240, grad_norm=101.263, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.122e-04, train_time=3.199
+[gpub015:0/64] 2023-07-04 18:00:53,333 (trainer:732) INFO: 11epoch:train:9301-9400batch: iter_time=1.103e-04, forward_time=0.148, loss_ctc=71.528, loss_att=60.448, acc=0.694, loss=63.772, backward_time=1.241, grad_norm=82.338, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.122e-04, train_time=3.175
+[gpub015:0/64] 2023-07-04 18:03:34,243 (trainer:732) INFO: 11epoch:train:9401-9500batch: iter_time=1.096e-04, forward_time=0.152, loss_ctc=73.079, loss_att=58.605, acc=0.691, loss=62.947, backward_time=1.240, grad_norm=88.620, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.179, optim0_lr0=1.121e-04, train_time=3.218
+[gpub015:0/64] 2023-07-04 18:06:11,084 (trainer:732) INFO: 11epoch:train:9501-9600batch: iter_time=1.217e-04, forward_time=0.144, loss_ctc=69.793, loss_att=52.272, acc=0.687, loss=57.529, backward_time=1.237, grad_norm=86.909, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.121e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 18:08:48,008 (trainer:732) INFO: 11epoch:train:9601-9700batch: iter_time=1.100e-04, forward_time=0.143, loss_ctc=77.129, loss_att=60.450, acc=0.676, loss=65.454, backward_time=1.238, grad_norm=89.294, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.120e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 18:11:24,516 (trainer:732) INFO: 11epoch:train:9701-9800batch: iter_time=1.087e-04, forward_time=0.142, loss_ctc=66.975, loss_att=52.929, acc=0.666, loss=57.143, backward_time=1.236, grad_norm=82.429, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.119e-04, train_time=3.130
+[gpub015:0/64] 2023-07-04 18:14:01,398 (trainer:732) INFO: 11epoch:train:9801-9900batch: iter_time=1.081e-04, forward_time=0.143, loss_ctc=74.627, loss_att=61.438, acc=0.686, loss=65.395, backward_time=1.237, grad_norm=80.146, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.119e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 18:16:38,364 (trainer:732) INFO: 11epoch:train:9901-10000batch: iter_time=1.095e-04, forward_time=0.144, loss_ctc=80.252, loss_att=63.991, acc=0.691, loss=68.869, backward_time=1.238, grad_norm=89.399, clip=100.000, loss_scale=1.100e+12, optim_step_time=0.178, optim0_lr0=1.118e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 18:29:09,297 (trainer:338) INFO: 11epoch results: [train] iter_time=0.153, forward_time=0.145, loss_ctc=74.807, loss_att=58.923, acc=0.676, loss=63.688, backward_time=1.240, grad_norm=88.098, clip=100.000, loss_scale=7.147e+11, optim_step_time=0.178, optim0_lr0=1.147e-04, train_time=3.683, time=5 hours, 7 minutes and 6.9 seconds, total_count=80000, gpu_max_cached_mem_GB=33.838, [valid] loss_ctc=61.632, cer_ctc=0.324, loss_att=49.266, acc=0.614, cer=0.445, wer=1.000, loss=52.976, time=6 minutes and 24.4 seconds, total_count=8602, gpu_max_cached_mem_GB=37.133, [att_plot] time=5 minutes and 54.75 seconds, total_count=0, gpu_max_cached_mem_GB=37.133
+[gpub015:0/64] 2023-07-04 18:29:26,715 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub015:0/64] 2023-07-04 18:29:26,720 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/6epoch.pth
+[gpub015:0/64] 2023-07-04 18:29:26,777 (trainer:272) INFO: 12/100epoch started. Estimated time to finish: 2 weeks, 5 days and 18 hours
+[gpub015:0/64] 2023-07-04 18:29:28,074 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub015:0/64] 2023-07-04 18:29:45,861 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 18:29:49,136 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f007571fac0>)
+[gpub015:0/64] 2023-07-04 18:29:49,136 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub015:0/64] 2023-07-04 18:29:49,212 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 18:35:37,004 (trainer:732) INFO: 12epoch:train:1-100batch: iter_time=2.064, forward_time=0.171, loss_ctc=84.536, loss_att=72.238, acc=0.649, loss=75.927, backward_time=1.250, grad_norm=121.129, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.118e-04, train_time=7.389
+[gpub015:0/64] 2023-07-04 18:38:17,593 (trainer:732) INFO: 12epoch:train:101-200batch: iter_time=1.095e-04, forward_time=0.144, loss_ctc=73.120, loss_att=54.091, acc=0.688, loss=59.800, backward_time=1.248, grad_norm=107.383, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.117e-04, train_time=3.212
+[gpub015:0/64] 2023-07-04 18:41:05,153 (trainer:732) INFO: 12epoch:train:201-300batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=70.845, loss_att=60.210, acc=0.659, loss=63.401, backward_time=1.249, grad_norm=88.294, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.117e-04, train_time=3.351
+[gpub015:0/64] 2023-07-04 18:43:44,855 (trainer:732) INFO: 12epoch:train:301-400batch: iter_time=1.144e-04, forward_time=0.144, loss_ctc=85.017, loss_att=69.739, acc=0.635, loss=74.322, backward_time=1.244, grad_norm=98.262, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.116e-04, train_time=3.194
+[gpub015:0/64] 2023-07-04 18:46:25,802 (trainer:732) INFO: 12epoch:train:401-500batch: iter_time=1.111e-04, forward_time=0.144, loss_ctc=74.240, loss_att=58.589, acc=0.662, loss=63.284, backward_time=1.244, grad_norm=86.501, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.116e-04, train_time=3.219
+[gpub015:0/64] 2023-07-04 18:49:05,301 (trainer:732) INFO: 12epoch:train:501-600batch: iter_time=1.124e-04, forward_time=0.145, loss_ctc=76.668, loss_att=63.744, acc=0.656, loss=67.622, backward_time=1.245, grad_norm=100.634, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.115e-04, train_time=3.190
+[gpub015:0/64] 2023-07-04 18:51:45,925 (trainer:732) INFO: 12epoch:train:601-700batch: iter_time=1.142e-04, forward_time=0.144, loss_ctc=81.493, loss_att=63.434, acc=0.662, loss=68.852, backward_time=1.243, grad_norm=121.231, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.114e-04, train_time=3.212
+[gpub015:0/64] 2023-07-04 18:54:26,636 (trainer:732) INFO: 12epoch:train:701-800batch: iter_time=1.127e-04, forward_time=0.145, loss_ctc=80.088, loss_att=59.983, acc=0.669, loss=66.014, backward_time=1.245, grad_norm=96.921, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.114e-04, train_time=3.214
+[gpub015:0/64] 2023-07-04 18:55:23,266 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub015:0/64] 2023-07-04 18:55:40,570 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 18:55:43,896 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f02368bfcd0>)
+[gpub015:0/64] 2023-07-04 18:55:43,896 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub015:0/64] 2023-07-04 18:55:43,902 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 18:59:47,620 (trainer:732) INFO: 12epoch:train:801-900batch: iter_time=1.289, forward_time=0.166, loss_ctc=86.833, loss_att=71.102, acc=0.669, loss=75.821, backward_time=1.252, grad_norm=107.388, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.113e-04, train_time=6.419
+[gpub015:0/64] 2023-07-04 19:02:25,633 (trainer:732) INFO: 12epoch:train:901-1000batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=74.625, loss_att=53.239, acc=0.692, loss=59.655, backward_time=1.243, grad_norm=84.263, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.113e-04, train_time=3.160
+[gpub015:0/64] 2023-07-04 19:05:02,569 (trainer:732) INFO: 12epoch:train:1001-1100batch: iter_time=1.213e-04, forward_time=0.145, loss_ctc=62.727, loss_att=49.968, acc=0.684, loss=53.796, backward_time=1.239, grad_norm=147.092, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.112e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 19:07:40,033 (trainer:732) INFO: 12epoch:train:1101-1200batch: iter_time=1.236e-04, forward_time=0.145, loss_ctc=85.676, loss_att=74.606, acc=0.647, loss=77.927, backward_time=1.242, grad_norm=97.002, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.112e-04, train_time=3.149
+[gpub015:0/64] 2023-07-04 19:10:17,138 (trainer:732) INFO: 12epoch:train:1201-1300batch: iter_time=1.088e-04, forward_time=0.144, loss_ctc=75.522, loss_att=58.402, acc=0.663, loss=63.538, backward_time=1.241, grad_norm=92.267, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.111e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 19:12:54,130 (trainer:732) INFO: 12epoch:train:1301-1400batch: iter_time=1.219e-04, forward_time=0.144, loss_ctc=72.477, loss_att=61.256, acc=0.664, loss=64.622, backward_time=1.241, grad_norm=85.313, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.111e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 19:15:31,210 (trainer:732) INFO: 12epoch:train:1401-1500batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=81.927, loss_att=61.508, acc=0.682, loss=67.634, backward_time=1.242, grad_norm=94.138, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.110e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 19:18:08,327 (trainer:732) INFO: 12epoch:train:1501-1600batch: iter_time=1.168e-04, forward_time=0.144, loss_ctc=79.107, loss_att=62.684, acc=0.673, loss=67.611, backward_time=1.242, grad_norm=109.704, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.109e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 19:20:01,909 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub015:0/64] 2023-07-04 19:20:19,627 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 19:20:23,028 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f00c452eaa0>)
+[gpub015:0/64] 2023-07-04 19:20:23,028 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub015:0/64] 2023-07-04 19:20:23,034 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 19:23:59,707 (trainer:732) INFO: 12epoch:train:1601-1700batch: iter_time=1.736, forward_time=0.146, loss_ctc=86.653, loss_att=68.901, acc=0.661, loss=74.226, backward_time=1.254, grad_norm=89.859, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.109e-04, train_time=7.027
+[gpub015:0/64] 2023-07-04 19:26:37,403 (trainer:732) INFO: 12epoch:train:1701-1800batch: iter_time=1.135e-04, forward_time=0.147, loss_ctc=77.426, loss_att=60.332, acc=0.688, loss=65.461, backward_time=1.245, grad_norm=85.067, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.108e-04, train_time=3.154
+[gpub015:0/64] 2023-07-04 19:29:14,527 (trainer:732) INFO: 12epoch:train:1801-1900batch: iter_time=1.164e-04, forward_time=0.146, loss_ctc=64.854, loss_att=49.130, acc=0.702, loss=53.847, backward_time=1.242, grad_norm=88.422, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.108e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 19:31:51,842 (trainer:732) INFO: 12epoch:train:1901-2000batch: iter_time=1.213e-04, forward_time=0.147, loss_ctc=74.672, loss_att=64.816, acc=0.661, loss=67.773, backward_time=1.243, grad_norm=86.333, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.107e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 19:34:29,137 (trainer:732) INFO: 12epoch:train:2001-2100batch: iter_time=1.190e-04, forward_time=0.147, loss_ctc=84.626, loss_att=67.119, acc=0.658, loss=72.371, backward_time=1.243, grad_norm=95.382, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.107e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 19:37:06,442 (trainer:732) INFO: 12epoch:train:2101-2200batch: iter_time=1.147e-04, forward_time=0.147, loss_ctc=70.258, loss_att=56.040, acc=0.670, loss=60.306, backward_time=1.243, grad_norm=107.485, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.106e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 19:39:43,817 (trainer:732) INFO: 12epoch:train:2201-2300batch: iter_time=1.094e-04, forward_time=0.147, loss_ctc=74.006, loss_att=60.037, acc=0.680, loss=64.228, backward_time=1.243, grad_norm=107.315, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.106e-04, train_time=3.147
+[gpub015:0/64] 2023-07-04 19:42:21,229 (trainer:732) INFO: 12epoch:train:2301-2400batch: iter_time=1.081e-04, forward_time=0.147, loss_ctc=86.224, loss_att=67.922, acc=0.683, loss=73.412, backward_time=1.243, grad_norm=104.049, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.105e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 19:44:58,641 (trainer:732) INFO: 12epoch:train:2401-2500batch: iter_time=1.103e-04, forward_time=0.148, loss_ctc=75.849, loss_att=55.993, acc=0.677, loss=61.950, backward_time=1.242, grad_norm=96.455, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.105e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 19:45:01,701 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub015:0/64] 2023-07-04 19:45:19,346 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 19:45:22,688 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f071411ff10>)
+[gpub015:0/64] 2023-07-04 19:45:22,688 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub015:0/64] 2023-07-04 19:45:22,694 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 19:51:26,255 (trainer:732) INFO: 12epoch:train:2501-2600batch: iter_time=1.225, forward_time=0.175, loss_ctc=81.079, loss_att=68.853, acc=0.671, loss=72.521, backward_time=1.255, grad_norm=89.970, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.104e-04, train_time=7.751
+[gpub015:0/64] 2023-07-04 19:54:04,278 (trainer:732) INFO: 12epoch:train:2601-2700batch: iter_time=1.185e-04, forward_time=0.144, loss_ctc=71.385, loss_att=52.315, acc=0.699, loss=58.036, backward_time=1.244, grad_norm=77.476, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.103e-04, train_time=3.161
+[gpub015:0/64] 2023-07-04 19:56:41,442 (trainer:732) INFO: 12epoch:train:2701-2800batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=67.155, loss_att=55.880, acc=0.678, loss=59.262, backward_time=1.242, grad_norm=77.514, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.103e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 19:59:18,844 (trainer:732) INFO: 12epoch:train:2801-2900batch: iter_time=1.069e-04, forward_time=0.145, loss_ctc=83.629, loss_att=65.947, acc=0.656, loss=71.251, backward_time=1.242, grad_norm=88.064, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.102e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 20:01:55,894 (trainer:732) INFO: 12epoch:train:2901-3000batch: iter_time=1.108e-04, forward_time=0.144, loss_ctc=74.662, loss_att=58.929, acc=0.668, loss=63.649, backward_time=1.242, grad_norm=86.125, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.102e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 20:04:33,377 (trainer:732) INFO: 12epoch:train:3001-3100batch: iter_time=1.100e-04, forward_time=0.145, loss_ctc=74.376, loss_att=60.452, acc=0.678, loss=64.629, backward_time=1.244, grad_norm=89.490, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.101e-04, train_time=3.149
+[gpub015:0/64] 2023-07-04 20:07:10,509 (trainer:732) INFO: 12epoch:train:3101-3200batch: iter_time=1.079e-04, forward_time=0.144, loss_ctc=77.433, loss_att=60.732, acc=0.681, loss=65.743, backward_time=1.242, grad_norm=105.819, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.101e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 20:09:47,663 (trainer:732) INFO: 12epoch:train:3201-3300batch: iter_time=1.169e-04, forward_time=0.145, loss_ctc=77.566, loss_att=60.190, acc=0.678, loss=65.403, backward_time=1.242, grad_norm=127.184, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.100e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 20:10:48,102 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub015:0/64] 2023-07-04 20:11:06,164 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 20:11:09,520 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0714718070>)
+[gpub015:0/64] 2023-07-04 20:11:09,520 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub015:0/64] 2023-07-04 20:11:09,526 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 20:16:56,469 (trainer:732) INFO: 12epoch:train:3301-3400batch: iter_time=1.721, forward_time=0.145, loss_ctc=81.802, loss_att=68.440, acc=0.662, loss=72.449, backward_time=1.251, grad_norm=120.608, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.100e-04, train_time=8.576
+[gpub015:0/64] 2023-07-04 20:19:33,743 (trainer:732) INFO: 12epoch:train:3401-3500batch: iter_time=1.214e-04, forward_time=0.144, loss_ctc=76.097, loss_att=57.732, acc=0.683, loss=63.241, backward_time=1.240, grad_norm=86.251, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.099e-04, train_time=3.145
+[gpub015:0/64] 2023-07-04 20:22:10,967 (trainer:732) INFO: 12epoch:train:3501-3600batch: iter_time=1.174e-04, forward_time=0.144, loss_ctc=62.531, loss_att=47.624, acc=0.692, loss=52.096, backward_time=1.240, grad_norm=76.543, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.099e-04, train_time=3.144
+[gpub015:0/64] 2023-07-04 20:24:48,258 (trainer:732) INFO: 12epoch:train:3601-3700batch: iter_time=1.147e-04, forward_time=0.145, loss_ctc=82.753, loss_att=73.085, acc=0.642, loss=75.986, backward_time=1.242, grad_norm=93.445, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.098e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 20:27:25,282 (trainer:732) INFO: 12epoch:train:3701-3800batch: iter_time=1.122e-04, forward_time=0.144, loss_ctc=79.961, loss_att=63.845, acc=0.651, loss=68.680, backward_time=1.241, grad_norm=94.879, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.181, optim0_lr0=1.098e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 20:30:02,221 (trainer:732) INFO: 12epoch:train:3801-3900batch: iter_time=1.218e-04, forward_time=0.145, loss_ctc=70.155, loss_att=59.692, acc=0.663, loss=62.831, backward_time=1.241, grad_norm=81.352, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.097e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 20:32:39,167 (trainer:732) INFO: 12epoch:train:3901-4000batch: iter_time=1.407e-04, forward_time=0.144, loss_ctc=80.943, loss_att=62.078, acc=0.682, loss=67.738, backward_time=1.242, grad_norm=92.320, clip=100.000, loss_scale=2.199e+12, optim_step_time=0.182, optim0_lr0=1.097e-04, train_time=3.139
+[gpub015:0/64] 2023-07-04 20:35:16,227 (trainer:732) INFO: 12epoch:train:4001-4100batch: iter_time=1.208e-04, forward_time=0.145, loss_ctc=76.768, loss_att=59.276, acc=0.678, loss=64.524, backward_time=1.241, grad_norm=102.125, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.096e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 20:37:02,841 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub015:0/64] 2023-07-04 20:37:20,845 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 20:37:24,221 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f07141395a0>)
+[gpub015:0/64] 2023-07-04 20:37:24,221 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub015:0/64] 2023-07-04 20:37:24,227 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 20:41:28,761 (trainer:732) INFO: 12epoch:train:4101-4200batch: iter_time=1.213, forward_time=0.145, loss_ctc=82.650, loss_att=63.715, acc=0.660, loss=69.395, backward_time=1.255, grad_norm=108.665, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.096e-04, train_time=7.450
+[gpub015:0/64] 2023-07-04 20:44:06,299 (trainer:732) INFO: 12epoch:train:4201-4300batch: iter_time=1.138e-04, forward_time=0.145, loss_ctc=77.528, loss_att=61.049, acc=0.680, loss=65.993, backward_time=1.243, grad_norm=102.226, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.095e-04, train_time=3.151
+[gpub015:0/64] 2023-07-04 20:46:43,366 (trainer:732) INFO: 12epoch:train:4301-4400batch: iter_time=1.237e-04, forward_time=0.144, loss_ctc=63.974, loss_att=49.928, acc=0.703, loss=54.141, backward_time=1.241, grad_norm=85.946, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.094e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 20:49:20,761 (trainer:732) INFO: 12epoch:train:4401-4500batch: iter_time=1.296e-04, forward_time=0.145, loss_ctc=73.590, loss_att=63.027, acc=0.653, loss=66.196, backward_time=1.243, grad_norm=98.824, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.094e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 20:51:57,869 (trainer:732) INFO: 12epoch:train:4501-4600batch: iter_time=1.230e-04, forward_time=0.143, loss_ctc=86.089, loss_att=68.894, acc=0.647, loss=74.053, backward_time=1.241, grad_norm=98.658, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.093e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 20:54:34,865 (trainer:732) INFO: 12epoch:train:4601-4700batch: iter_time=1.307e-04, forward_time=0.144, loss_ctc=69.746, loss_att=61.107, acc=0.658, loss=63.699, backward_time=1.241, grad_norm=101.548, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.093e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 20:57:11,892 (trainer:732) INFO: 12epoch:train:4701-4800batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=72.222, loss_att=59.132, acc=0.676, loss=63.059, backward_time=1.240, grad_norm=95.294, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.092e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 20:59:49,193 (trainer:732) INFO: 12epoch:train:4801-4900batch: iter_time=1.299e-04, forward_time=0.144, loss_ctc=84.221, loss_att=67.911, acc=0.672, loss=72.804, backward_time=1.242, grad_norm=97.856, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.092e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 21:02:26,288 (trainer:732) INFO: 12epoch:train:4901-5000batch: iter_time=1.076e-04, forward_time=0.145, loss_ctc=77.713, loss_att=56.351, acc=0.676, loss=62.760, backward_time=1.241, grad_norm=93.021, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.180, optim0_lr0=1.091e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 21:02:29,142 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub015:0/64] 2023-07-04 21:02:47,188 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 21:02:50,583 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070a26f4f0>)
+[gpub015:0/64] 2023-07-04 21:02:50,583 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub015:0/64] 2023-07-04 21:02:50,589 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 21:09:23,316 (trainer:732) INFO: 12epoch:train:5001-5100batch: iter_time=1.207, forward_time=0.146, loss_ctc=82.085, loss_att=70.491, acc=0.667, loss=73.969, backward_time=1.255, grad_norm=101.219, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.091e-04, train_time=8.340
+[gpub015:0/64] 2023-07-04 21:12:00,519 (trainer:732) INFO: 12epoch:train:5101-5200batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=70.631, loss_att=52.245, acc=0.702, loss=57.761, backward_time=1.241, grad_norm=82.535, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.090e-04, train_time=3.144
+[gpub015:0/64] 2023-07-04 21:14:37,398 (trainer:732) INFO: 12epoch:train:5201-5300batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=69.574, loss_att=59.873, acc=0.662, loss=62.783, backward_time=1.240, grad_norm=83.957, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.090e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 21:17:14,881 (trainer:732) INFO: 12epoch:train:5301-5400batch: iter_time=9.520e-05, forward_time=0.145, loss_ctc=80.138, loss_att=66.006, acc=0.645, loss=70.246, backward_time=1.242, grad_norm=92.850, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.089e-04, train_time=3.149
+[gpub015:0/64] 2023-07-04 21:19:51,908 (trainer:732) INFO: 12epoch:train:5401-5500batch: iter_time=9.896e-05, forward_time=0.144, loss_ctc=71.912, loss_att=58.300, acc=0.672, loss=62.384, backward_time=1.240, grad_norm=92.965, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.089e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 21:22:28,899 (trainer:732) INFO: 12epoch:train:5501-5600batch: iter_time=9.511e-05, forward_time=0.144, loss_ctc=74.006, loss_att=59.170, acc=0.672, loss=63.621, backward_time=1.241, grad_norm=83.684, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.088e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 21:25:05,777 (trainer:732) INFO: 12epoch:train:5601-5700batch: iter_time=9.316e-05, forward_time=0.143, loss_ctc=77.242, loss_att=61.429, acc=0.675, loss=66.173, backward_time=1.240, grad_norm=96.974, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.088e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 21:27:42,708 (trainer:732) INFO: 12epoch:train:5701-5800batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=77.205, loss_att=58.937, acc=0.675, loss=64.417, backward_time=1.240, grad_norm=105.464, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.087e-04, train_time=3.138
+[gpub015:0/64] 2023-07-04 21:28:38,964 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub015:0/64] 2023-07-04 21:28:56,893 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 21:29:00,277 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06f6bc7c40>)
+[gpub015:0/64] 2023-07-04 21:29:00,278 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub015:0/64] 2023-07-04 21:29:00,284 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 21:33:26,178 (trainer:732) INFO: 12epoch:train:5801-5900batch: iter_time=1.191, forward_time=0.146, loss_ctc=82.820, loss_att=69.559, acc=0.671, loss=73.537, backward_time=1.262, grad_norm=95.044, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.087e-04, train_time=6.869
+[gpub015:0/64] 2023-07-04 21:36:03,771 (trainer:732) INFO: 12epoch:train:5901-6000batch: iter_time=1.103e-04, forward_time=0.146, loss_ctc=75.434, loss_att=55.881, acc=0.695, loss=61.747, backward_time=1.242, grad_norm=85.721, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.086e-04, train_time=3.152
+[gpub015:0/64] 2023-07-04 21:38:40,878 (trainer:732) INFO: 12epoch:train:6001-6100batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=61.211, loss_att=46.498, acc=0.698, loss=50.912, backward_time=1.241, grad_norm=79.707, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.086e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 21:41:18,354 (trainer:732) INFO: 12epoch:train:6101-6200batch: iter_time=1.148e-04, forward_time=0.146, loss_ctc=81.359, loss_att=70.422, acc=0.664, loss=73.703, backward_time=1.243, grad_norm=100.635, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.085e-04, train_time=3.149
+[gpub015:0/64] 2023-07-04 21:43:55,529 (trainer:732) INFO: 12epoch:train:6201-6300batch: iter_time=1.147e-04, forward_time=0.146, loss_ctc=77.317, loss_att=64.201, acc=0.663, loss=68.136, backward_time=1.242, grad_norm=85.881, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.085e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 21:46:32,894 (trainer:732) INFO: 12epoch:train:6301-6400batch: iter_time=1.152e-04, forward_time=0.146, loss_ctc=69.240, loss_att=57.327, acc=0.672, loss=60.901, backward_time=1.241, grad_norm=90.918, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.084e-04, train_time=3.147
+[gpub015:0/64] 2023-07-04 21:49:10,067 (trainer:732) INFO: 12epoch:train:6401-6500batch: iter_time=1.151e-04, forward_time=0.146, loss_ctc=80.419, loss_att=61.954, acc=0.689, loss=67.493, backward_time=1.242, grad_norm=100.821, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.084e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 21:51:47,235 (trainer:732) INFO: 12epoch:train:6501-6600batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=76.050, loss_att=60.487, acc=0.687, loss=65.156, backward_time=1.242, grad_norm=88.698, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.083e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 21:53:35,633 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub015:0/64] 2023-07-04 21:53:54,004 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 21:53:57,417 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070b0452d0>)
+[gpub015:0/64] 2023-07-04 21:53:57,417 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub015:0/64] 2023-07-04 21:53:57,423 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 21:59:20,328 (trainer:732) INFO: 12epoch:train:6601-6700batch: iter_time=1.193, forward_time=0.147, loss_ctc=82.390, loss_att=62.650, acc=0.665, loss=68.572, backward_time=1.253, grad_norm=98.318, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.083e-04, train_time=9.062
+[gpub015:0/64] 2023-07-04 22:01:58,471 (trainer:732) INFO: 12epoch:train:6701-6800batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=76.609, loss_att=61.085, acc=0.679, loss=65.742, backward_time=1.244, grad_norm=106.235, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.082e-04, train_time=3.163
+[gpub015:0/64] 2023-07-04 22:04:35,355 (trainer:732) INFO: 12epoch:train:6801-6900batch: iter_time=1.242e-04, forward_time=0.143, loss_ctc=64.435, loss_att=50.254, acc=0.706, loss=54.508, backward_time=1.240, grad_norm=81.368, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.082e-04, train_time=3.137
+[gpub015:0/64] 2023-07-04 22:07:12,751 (trainer:732) INFO: 12epoch:train:6901-7000batch: iter_time=1.163e-04, forward_time=0.144, loss_ctc=70.848, loss_att=61.231, acc=0.665, loss=64.116, backward_time=1.241, grad_norm=86.145, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.081e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 22:09:49,941 (trainer:732) INFO: 12epoch:train:7001-7100batch: iter_time=1.173e-04, forward_time=0.145, loss_ctc=83.009, loss_att=68.382, acc=0.652, loss=72.770, backward_time=1.243, grad_norm=95.705, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.081e-04, train_time=3.144
+[gpub015:0/64] 2023-07-04 22:12:26,941 (trainer:732) INFO: 12epoch:train:7101-7200batch: iter_time=9.946e-05, forward_time=0.144, loss_ctc=70.979, loss_att=56.996, acc=0.668, loss=61.190, backward_time=1.242, grad_norm=78.799, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.080e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 22:15:04,098 (trainer:732) INFO: 12epoch:train:7201-7300batch: iter_time=1.017e-04, forward_time=0.144, loss_ctc=70.377, loss_att=56.229, acc=0.680, loss=60.473, backward_time=1.242, grad_norm=84.014, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.080e-04, train_time=3.143
+[gpub015:0/64] 2023-07-04 22:17:41,598 (trainer:732) INFO: 12epoch:train:7301-7400batch: iter_time=9.492e-05, forward_time=0.144, loss_ctc=81.351, loss_att=66.341, acc=0.673, loss=70.844, backward_time=1.243, grad_norm=93.304, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.079e-04, train_time=3.150
+[gpub015:0/64] 2023-07-04 22:20:18,910 (trainer:732) INFO: 12epoch:train:7401-7500batch: iter_time=1.063e-04, forward_time=0.144, loss_ctc=78.117, loss_att=55.626, acc=0.691, loss=62.374, backward_time=1.242, grad_norm=96.027, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.079e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 22:20:21,637 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub015:0/64] 2023-07-04 22:20:39,406 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 22:20:42,788 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070be174f0>)
+[gpub015:0/64] 2023-07-04 22:20:42,788 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub015:0/64] 2023-07-04 22:20:42,794 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 22:26:34,393 (trainer:732) INFO: 12epoch:train:7501-7600batch: iter_time=1.201, forward_time=0.146, loss_ctc=78.276, loss_att=65.547, acc=0.682, loss=69.366, backward_time=1.258, grad_norm=125.636, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.078e-04, train_time=7.509
+[gpub015:0/64] 2023-07-04 22:29:11,707 (trainer:732) INFO: 12epoch:train:7601-7700batch: iter_time=1.155e-04, forward_time=0.145, loss_ctc=69.956, loss_att=52.275, acc=0.704, loss=57.579, backward_time=1.241, grad_norm=86.535, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.078e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 22:31:48,758 (trainer:732) INFO: 12epoch:train:7701-7800batch: iter_time=1.089e-04, forward_time=0.145, loss_ctc=66.350, loss_att=56.272, acc=0.683, loss=59.295, backward_time=1.241, grad_norm=81.376, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.077e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 22:34:26,515 (trainer:732) INFO: 12epoch:train:7801-7900batch: iter_time=1.116e-04, forward_time=0.146, loss_ctc=81.444, loss_att=63.780, acc=0.666, loss=69.079, backward_time=1.243, grad_norm=82.021, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.182, optim0_lr0=1.077e-04, train_time=3.155
+[gpub015:0/64] 2023-07-04 22:37:03,817 (trainer:732) INFO: 12epoch:train:7901-8000batch: iter_time=1.259e-04, forward_time=0.146, loss_ctc=73.305, loss_att=58.553, acc=0.673, loss=62.979, backward_time=1.242, grad_norm=86.271, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.076e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 22:39:41,184 (trainer:732) INFO: 12epoch:train:8001-8100batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=72.842, loss_att=59.680, acc=0.680, loss=63.629, backward_time=1.243, grad_norm=83.633, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.076e-04, train_time=3.147
+[gpub015:0/64] 2023-07-04 22:42:18,296 (trainer:732) INFO: 12epoch:train:8101-8200batch: iter_time=1.052e-04, forward_time=0.145, loss_ctc=76.362, loss_att=60.168, acc=0.687, loss=65.026, backward_time=1.242, grad_norm=110.533, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.075e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 22:44:55,396 (trainer:732) INFO: 12epoch:train:8201-8300batch: iter_time=9.924e-05, forward_time=0.145, loss_ctc=76.992, loss_att=57.507, acc=0.689, loss=63.352, backward_time=1.242, grad_norm=87.809, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.075e-04, train_time=3.142
+[gpub015:0/64] 2023-07-04 22:45:50,214 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub015:0/64] 2023-07-04 22:46:08,597 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 22:46:12,246 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f070b813520>)
+[gpub015:0/64] 2023-07-04 22:46:12,247 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub015:0/64] 2023-07-04 22:46:12,253 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 22:51:21,478 (trainer:732) INFO: 12epoch:train:8301-8400batch: iter_time=1.205, forward_time=0.144, loss_ctc=80.365, loss_att=65.058, acc=0.674, loss=69.650, backward_time=1.253, grad_norm=106.325, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.074e-04, train_time=7.721
+[gpub015:0/64] 2023-07-04 22:53:59,133 (trainer:732) INFO: 12epoch:train:8401-8500batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=74.424, loss_att=55.889, acc=0.689, loss=61.450, backward_time=1.242, grad_norm=95.701, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.074e-04, train_time=3.153
+[gpub015:0/64] 2023-07-04 22:56:36,584 (trainer:732) INFO: 12epoch:train:8501-8600batch: iter_time=1.030e-04, forward_time=0.145, loss_ctc=61.774, loss_att=47.039, acc=0.701, loss=51.460, backward_time=1.242, grad_norm=81.038, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.073e-04, train_time=3.149
+[gpub015:0/64] 2023-07-04 22:59:13,839 (trainer:732) INFO: 12epoch:train:8601-8700batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=78.654, loss_att=69.938, acc=0.653, loss=72.552, backward_time=1.243, grad_norm=89.759, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.073e-04, train_time=3.145
+[gpub015:0/64] 2023-07-04 23:01:51,155 (trainer:732) INFO: 12epoch:train:8701-8800batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=77.945, loss_att=62.783, acc=0.660, loss=67.331, backward_time=1.241, grad_norm=83.799, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.072e-04, train_time=3.146
+[gpub015:0/64] 2023-07-04 23:04:28,142 (trainer:732) INFO: 12epoch:train:8801-8900batch: iter_time=1.221e-04, forward_time=0.144, loss_ctc=70.601, loss_att=58.052, acc=0.671, loss=61.817, backward_time=1.240, grad_norm=91.330, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.072e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 23:07:05,403 (trainer:732) INFO: 12epoch:train:8901-9000batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=80.327, loss_att=60.532, acc=0.690, loss=66.471, backward_time=1.242, grad_norm=96.076, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.071e-04, train_time=3.145
+[gpub015:0/64] 2023-07-04 23:09:42,434 (trainer:732) INFO: 12epoch:train:9001-9100batch: iter_time=1.162e-04, forward_time=0.144, loss_ctc=75.994, loss_att=58.252, acc=0.683, loss=63.575, backward_time=1.241, grad_norm=91.538, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.071e-04, train_time=3.140
+[gpub015:0/64] 2023-07-04 23:11:28,947 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub015:0/64] 2023-07-04 23:11:46,760 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 23:11:50,122 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0635ddb460>)
+[gpub015:0/64] 2023-07-04 23:11:50,122 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub015:0/64] 2023-07-04 23:11:50,128 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 23:16:39,903 (trainer:732) INFO: 12epoch:train:9101-9200batch: iter_time=1.216, forward_time=0.145, loss_ctc=81.962, loss_att=63.013, acc=0.674, loss=68.697, backward_time=1.255, grad_norm=96.644, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.070e-04, train_time=8.349
+[gpub015:0/64] 2023-07-04 23:19:18,238 (trainer:732) INFO: 12epoch:train:9201-9300batch: iter_time=1.179e-04, forward_time=0.145, loss_ctc=76.818, loss_att=60.914, acc=0.689, loss=65.685, backward_time=1.244, grad_norm=97.394, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.070e-04, train_time=3.166
+[gpub015:0/64] 2023-07-04 23:21:55,891 (trainer:732) INFO: 12epoch:train:9301-9400batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=64.491, loss_att=48.491, acc=0.714, loss=53.291, backward_time=1.242, grad_norm=103.178, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.069e-04, train_time=3.153
+[gpub015:0/64] 2023-07-04 23:24:33,316 (trainer:732) INFO: 12epoch:train:9401-9500batch: iter_time=1.123e-04, forward_time=0.146, loss_ctc=71.079, loss_att=60.571, acc=0.676, loss=63.723, backward_time=1.242, grad_norm=96.268, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.069e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 23:27:10,748 (trainer:732) INFO: 12epoch:train:9501-9600batch: iter_time=1.076e-04, forward_time=0.146, loss_ctc=83.740, loss_att=65.360, acc=0.670, loss=70.874, backward_time=1.243, grad_norm=101.596, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.068e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 23:29:48,320 (trainer:732) INFO: 12epoch:train:9601-9700batch: iter_time=1.198e-04, forward_time=0.146, loss_ctc=68.537, loss_att=55.883, acc=0.678, loss=59.679, backward_time=1.243, grad_norm=91.222, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.068e-04, train_time=3.151
+[gpub015:0/64] 2023-07-04 23:32:25,785 (trainer:732) INFO: 12epoch:train:9701-9800batch: iter_time=1.094e-04, forward_time=0.146, loss_ctc=71.251, loss_att=56.795, acc=0.683, loss=61.132, backward_time=1.242, grad_norm=95.223, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.067e-04, train_time=3.149
+[gpub015:0/64] 2023-07-04 23:35:03,217 (trainer:732) INFO: 12epoch:train:9801-9900batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=82.689, loss_att=66.938, acc=0.684, loss=71.663, backward_time=1.243, grad_norm=92.914, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.067e-04, train_time=3.148
+[gpub015:0/64] 2023-07-04 23:37:40,257 (trainer:732) INFO: 12epoch:train:9901-10000batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=76.321, loss_att=55.700, acc=0.689, loss=61.887, backward_time=1.240, grad_norm=106.101, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.066e-04, train_time=3.141
+[gpub015:0/64] 2023-07-04 23:50:19,270 (trainer:338) INFO: 12epoch results: [train] iter_time=0.165, forward_time=0.146, loss_ctc=76.028, loss_att=60.678, acc=0.674, loss=65.283, backward_time=1.244, grad_norm=95.160, clip=100.000, loss_scale=4.398e+12, optim_step_time=0.181, optim0_lr0=1.091e-04, train_time=3.698, time=5 hours, 8 minutes and 28.51 seconds, total_count=90000, gpu_max_cached_mem_GB=37.139, [valid] loss_ctc=56.126, cer_ctc=0.306, loss_att=45.911, acc=0.623, cer=0.448, wer=0.996, loss=48.975, time=6 minutes and 37.11 seconds, total_count=9614, gpu_max_cached_mem_GB=37.139, [att_plot] time=5 minutes and 46.68 seconds, total_count=0, gpu_max_cached_mem_GB=37.139
+[gpub015:0/64] 2023-07-04 23:50:34,276 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub015:0/64] 2023-07-04 23:50:34,316 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/7epoch.pth
+[gpub015:0/64] 2023-07-04 23:50:34,317 (trainer:272) INFO: 13/100epoch started. Estimated time to finish: 2 weeks, 5 days and 13 hours
+[gpub015:0/64] 2023-07-04 23:50:34,320 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub015:0/64] 2023-07-04 23:50:51,721 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-04 23:50:55,289 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f07151676a0>)
+[gpub015:0/64] 2023-07-04 23:50:55,289 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub015:0/64] 2023-07-04 23:50:55,296 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-04 23:55:20,069 (trainer:732) INFO: 13epoch:train:1-100batch: iter_time=1.207, forward_time=0.145, loss_ctc=85.987, loss_att=70.558, acc=0.640, loss=75.187, backward_time=1.257, grad_norm=124.267, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.066e-04, train_time=5.715
+[gpub015:0/64] 2023-07-04 23:58:04,071 (trainer:732) INFO: 13epoch:train:101-200batch: iter_time=0.006, forward_time=0.184, loss_ctc=73.151, loss_att=51.368, acc=0.686, loss=57.903, backward_time=1.255, grad_norm=88.292, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.182, optim0_lr0=1.065e-04, train_time=3.280
+[gpub015:0/64] 2023-07-05 00:00:49,379 (trainer:732) INFO: 13epoch:train:201-300batch: iter_time=1.286e-04, forward_time=0.174, loss_ctc=83.595, loss_att=65.113, acc=0.656, loss=70.658, backward_time=1.252, grad_norm=113.517, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.183, optim0_lr0=1.065e-04, train_time=3.306
+[gpub015:0/64] 2023-07-05 00:03:30,808 (trainer:732) INFO: 13epoch:train:301-400batch: iter_time=1.342e-04, forward_time=0.152, loss_ctc=84.466, loss_att=68.319, acc=0.661, loss=73.163, backward_time=1.243, grad_norm=105.019, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.182, optim0_lr0=1.064e-04, train_time=3.228
+[gpub015:0/64] 2023-07-05 00:06:08,085 (trainer:732) INFO: 13epoch:train:401-500batch: iter_time=1.279e-04, forward_time=0.144, loss_ctc=88.619, loss_att=76.517, acc=0.651, loss=80.147, backward_time=1.243, grad_norm=109.872, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.180, optim0_lr0=1.064e-04, train_time=3.145
+[gpub015:0/64] 2023-07-05 00:08:48,273 (trainer:732) INFO: 13epoch:train:501-600batch: iter_time=1.310e-04, forward_time=0.144, loss_ctc=78.073, loss_att=59.874, acc=0.665, loss=65.334, backward_time=1.243, grad_norm=157.357, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.063e-04, train_time=3.204
+[gpub015:0/64] 2023-07-05 00:11:32,992 (trainer:732) INFO: 13epoch:train:601-700batch: iter_time=1.264e-04, forward_time=0.143, loss_ctc=77.084, loss_att=60.101, acc=0.662, loss=65.196, backward_time=1.245, grad_norm=98.957, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.180, optim0_lr0=1.063e-04, train_time=3.294
+[gpub015:0/64] 2023-07-05 00:14:10,177 (trainer:732) INFO: 13epoch:train:701-800batch: iter_time=1.302e-04, forward_time=0.144, loss_ctc=72.641, loss_att=55.254, acc=0.691, loss=60.470, backward_time=1.241, grad_norm=97.430, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.062e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 00:15:19,090 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub015:0/64] 2023-07-05 00:15:36,248 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 00:15:39,840 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efbcd4e04f0>)
+[gpub015:0/64] 2023-07-05 00:15:39,840 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub015:0/64] 2023-07-05 00:15:39,846 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 00:21:58,716 (trainer:732) INFO: 13epoch:train:801-900batch: iter_time=1.834, forward_time=0.144, loss_ctc=75.899, loss_att=62.053, acc=0.682, loss=66.207, backward_time=1.249, grad_norm=96.235, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.062e-04, train_time=9.371
+[gpub015:0/64] 2023-07-05 00:24:36,279 (trainer:732) INFO: 13epoch:train:901-1000batch: iter_time=1.032e-04, forward_time=0.144, loss_ctc=78.048, loss_att=61.186, acc=0.667, loss=66.245, backward_time=1.242, grad_norm=122.905, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.061e-04, train_time=3.151
+[gpub015:0/64] 2023-07-05 00:27:13,670 (trainer:732) INFO: 13epoch:train:1001-1100batch: iter_time=1.110e-04, forward_time=0.144, loss_ctc=76.879, loss_att=56.861, acc=0.686, loss=62.866, backward_time=1.242, grad_norm=98.500, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.061e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 00:29:50,859 (trainer:732) INFO: 13epoch:train:1101-1200batch: iter_time=1.162e-04, forward_time=0.145, loss_ctc=85.818, loss_att=67.227, acc=0.673, loss=72.804, backward_time=1.243, grad_norm=118.579, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.060e-04, train_time=3.144
+[gpub015:0/64] 2023-07-05 00:32:28,234 (trainer:732) INFO: 13epoch:train:1201-1300batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=85.561, loss_att=71.717, acc=0.666, loss=75.871, backward_time=1.245, grad_norm=103.403, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.060e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 00:35:05,850 (trainer:732) INFO: 13epoch:train:1301-1400batch: iter_time=1.150e-04, forward_time=0.146, loss_ctc=85.789, loss_att=70.197, acc=0.673, loss=74.875, backward_time=1.245, grad_norm=113.813, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.060e-04, train_time=3.152
+[gpub015:0/64] 2023-07-05 00:37:42,827 (trainer:732) INFO: 13epoch:train:1401-1500batch: iter_time=1.053e-04, forward_time=0.144, loss_ctc=76.540, loss_att=55.910, acc=0.667, loss=62.099, backward_time=1.240, grad_norm=91.779, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.059e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 00:40:20,163 (trainer:732) INFO: 13epoch:train:1501-1600batch: iter_time=1.059e-04, forward_time=0.145, loss_ctc=74.583, loss_att=60.149, acc=0.685, loss=64.479, backward_time=1.243, grad_norm=88.474, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.059e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 00:42:12,133 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub015:0/64] 2023-07-05 00:42:30,034 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 00:42:33,425 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f07152d7610>)
+[gpub015:0/64] 2023-07-05 00:42:33,425 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub015:0/64] 2023-07-05 00:42:33,463 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 00:46:46,084 (trainer:732) INFO: 13epoch:train:1601-1700batch: iter_time=1.791, forward_time=0.145, loss_ctc=74.553, loss_att=58.445, acc=0.700, loss=63.277, backward_time=1.251, grad_norm=111.557, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.058e-04, train_time=7.718
+[gpub015:0/64] 2023-07-05 00:49:24,208 (trainer:732) INFO: 13epoch:train:1701-1800batch: iter_time=1.166e-04, forward_time=0.145, loss_ctc=75.927, loss_att=60.423, acc=0.666, loss=65.074, backward_time=1.244, grad_norm=134.424, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.058e-04, train_time=3.162
+[gpub015:0/64] 2023-07-05 00:52:01,356 (trainer:732) INFO: 13epoch:train:1801-1900batch: iter_time=1.104e-04, forward_time=0.144, loss_ctc=77.363, loss_att=57.785, acc=0.684, loss=63.658, backward_time=1.242, grad_norm=94.176, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.057e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 00:54:38,891 (trainer:732) INFO: 13epoch:train:1901-2000batch: iter_time=1.082e-04, forward_time=0.145, loss_ctc=78.779, loss_att=62.722, acc=0.687, loss=67.539, backward_time=1.243, grad_norm=96.279, clip=100.000, loss_scale=8.796e+12, optim_step_time=0.181, optim0_lr0=1.057e-04, train_time=3.150
+[gpub015:0/64] 2023-07-05 00:57:18,535 (trainer:732) INFO: 13epoch:train:2001-2100batch: iter_time=1.124e-04, forward_time=0.145, loss_ctc=83.142, loss_att=62.800, acc=0.676, loss=68.903, backward_time=1.244, grad_norm=93.127, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.056e-04, train_time=3.193
+[gpub015:0/64] 2023-07-05 01:00:10,027 (trainer:732) INFO: 13epoch:train:2101-2200batch: iter_time=1.114e-04, forward_time=0.145, loss_ctc=87.896, loss_att=72.933, acc=0.670, loss=77.422, backward_time=1.296, grad_norm=109.444, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.056e-04, train_time=3.430
+[gpub015:0/64] 2023-07-05 01:02:47,356 (trainer:732) INFO: 13epoch:train:2201-2300batch: iter_time=1.144e-04, forward_time=0.144, loss_ctc=76.474, loss_att=59.030, acc=0.674, loss=64.263, backward_time=1.243, grad_norm=92.859, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.055e-04, train_time=3.146
+[gpub015:0/64] 2023-07-05 01:05:27,710 (trainer:732) INFO: 13epoch:train:2301-2400batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=76.529, loss_att=59.750, acc=0.682, loss=64.784, backward_time=1.244, grad_norm=89.961, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.055e-04, train_time=3.207
+[gpub015:0/64] 2023-07-05 01:08:04,714 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub015:0/64] 2023-07-05 01:08:22,388 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 01:08:25,735 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efbe7f34ee0>)
+[gpub015:0/64] 2023-07-05 01:08:25,735 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub015:0/64] 2023-07-05 01:08:25,742 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 01:13:37,098 (trainer:732) INFO: 13epoch:train:2401-2500batch: iter_time=1.192, forward_time=0.144, loss_ctc=70.399, loss_att=53.578, acc=0.702, loss=58.624, backward_time=1.244, grad_norm=79.956, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.054e-04, train_time=9.788
+[gpub015:0/64] 2023-07-05 01:16:16,640 (trainer:732) INFO: 13epoch:train:2501-2600batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=77.710, loss_att=67.795, acc=0.656, loss=70.770, backward_time=1.249, grad_norm=156.311, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.054e-04, train_time=3.191
+[gpub015:0/64] 2023-07-05 01:18:53,489 (trainer:732) INFO: 13epoch:train:2601-2700batch: iter_time=1.184e-04, forward_time=0.144, loss_ctc=71.797, loss_att=51.023, acc=0.687, loss=57.255, backward_time=1.240, grad_norm=156.086, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.053e-04, train_time=3.137
+[gpub015:0/64] 2023-07-05 01:21:30,516 (trainer:732) INFO: 13epoch:train:2701-2800batch: iter_time=1.030e-04, forward_time=0.144, loss_ctc=78.373, loss_att=62.065, acc=0.669, loss=66.957, backward_time=1.241, grad_norm=95.962, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.053e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 01:24:07,692 (trainer:732) INFO: 13epoch:train:2801-2900batch: iter_time=9.315e-05, forward_time=0.143, loss_ctc=83.675, loss_att=66.663, acc=0.670, loss=71.766, backward_time=1.242, grad_norm=96.929, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.052e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 01:26:45,090 (trainer:732) INFO: 13epoch:train:2901-3000batch: iter_time=1.043e-04, forward_time=0.144, loss_ctc=86.928, loss_att=75.665, acc=0.659, loss=79.044, backward_time=1.243, grad_norm=119.641, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.052e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 01:29:22,331 (trainer:732) INFO: 13epoch:train:3001-3100batch: iter_time=9.929e-05, forward_time=0.144, loss_ctc=74.752, loss_att=55.415, acc=0.672, loss=61.216, backward_time=1.242, grad_norm=108.366, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.052e-04, train_time=3.145
+[gpub015:0/64] 2023-07-05 01:31:59,171 (trainer:732) INFO: 13epoch:train:3101-3200batch: iter_time=9.663e-05, forward_time=0.143, loss_ctc=75.329, loss_att=58.040, acc=0.669, loss=63.226, backward_time=1.239, grad_norm=93.650, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.051e-04, train_time=3.137
+[gpub015:0/64] 2023-07-05 01:34:36,282 (trainer:732) INFO: 13epoch:train:3201-3300batch: iter_time=1.020e-04, forward_time=0.143, loss_ctc=71.885, loss_att=54.805, acc=0.700, loss=59.929, backward_time=1.241, grad_norm=90.742, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.051e-04, train_time=3.142
+[gpub015:0/64] 2023-07-05 01:35:27,815 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub015:0/64] 2023-07-05 01:35:46,394 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 01:35:49,849 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0983b77460>)
+[gpub015:0/64] 2023-07-05 01:35:49,849 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub015:0/64] 2023-07-05 01:35:49,855 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 01:42:13,884 (trainer:732) INFO: 13epoch:train:3301-3400batch: iter_time=1.232, forward_time=0.144, loss_ctc=75.439, loss_att=63.828, acc=0.678, loss=67.311, backward_time=1.257, grad_norm=94.817, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.050e-04, train_time=9.152
+[gpub015:0/64] 2023-07-05 01:44:51,700 (trainer:732) INFO: 13epoch:train:3401-3500batch: iter_time=1.174e-04, forward_time=0.145, loss_ctc=74.146, loss_att=54.359, acc=0.677, loss=60.295, backward_time=1.243, grad_norm=122.599, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.050e-04, train_time=3.156
+[gpub015:0/64] 2023-07-05 01:47:28,958 (trainer:732) INFO: 13epoch:train:3501-3600batch: iter_time=1.050e-04, forward_time=0.145, loss_ctc=73.295, loss_att=55.148, acc=0.682, loss=60.592, backward_time=1.243, grad_norm=88.590, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.049e-04, train_time=3.145
+[gpub015:0/64] 2023-07-05 01:50:06,157 (trainer:732) INFO: 13epoch:train:3601-3700batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=83.897, loss_att=65.933, acc=0.670, loss=71.322, backward_time=1.242, grad_norm=103.744, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.049e-04, train_time=3.144
+[gpub015:0/64] 2023-07-05 01:52:43,823 (trainer:732) INFO: 13epoch:train:3701-3800batch: iter_time=1.084e-04, forward_time=0.144, loss_ctc=83.779, loss_att=69.293, acc=0.660, loss=73.639, backward_time=1.243, grad_norm=108.364, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.048e-04, train_time=3.153
+[gpub015:0/64] 2023-07-05 01:55:26,107 (trainer:732) INFO: 13epoch:train:3801-3900batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=82.365, loss_att=67.557, acc=0.668, loss=71.999, backward_time=1.247, grad_norm=101.347, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.048e-04, train_time=3.245
+[gpub015:0/64] 2023-07-05 01:58:04,045 (trainer:732) INFO: 13epoch:train:3901-4000batch: iter_time=1.078e-04, forward_time=0.144, loss_ctc=74.649, loss_att=55.654, acc=0.670, loss=61.352, backward_time=1.243, grad_norm=98.467, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.047e-04, train_time=3.159
+[gpub015:0/64] 2023-07-05 02:00:41,402 (trainer:732) INFO: 13epoch:train:4001-4100batch: iter_time=1.069e-04, forward_time=0.146, loss_ctc=73.796, loss_att=60.178, acc=0.683, loss=64.263, backward_time=1.243, grad_norm=95.938, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.047e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 02:02:24,654 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub015:0/64] 2023-07-05 02:02:42,647 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 02:02:46,089 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efc37eb7520>)
+[gpub015:0/64] 2023-07-05 02:02:46,089 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub015:0/64] 2023-07-05 02:02:46,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 02:07:52,811 (trainer:732) INFO: 13epoch:train:4101-4200batch: iter_time=1.202, forward_time=0.144, loss_ctc=74.415, loss_att=55.435, acc=0.692, loss=61.129, backward_time=1.254, grad_norm=107.073, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.046e-04, train_time=8.628
+[gpub015:0/64] 2023-07-05 02:10:30,964 (trainer:732) INFO: 13epoch:train:4201-4300batch: iter_time=1.046e-04, forward_time=0.144, loss_ctc=72.424, loss_att=58.787, acc=0.670, loss=62.878, backward_time=1.242, grad_norm=122.387, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.046e-04, train_time=3.163
+[gpub015:0/64] 2023-07-05 02:13:08,002 (trainer:732) INFO: 13epoch:train:4301-4400batch: iter_time=1.043e-04, forward_time=0.144, loss_ctc=75.996, loss_att=56.412, acc=0.673, loss=62.287, backward_time=1.240, grad_norm=92.333, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.046e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 02:15:45,069 (trainer:732) INFO: 13epoch:train:4401-4500batch: iter_time=1.084e-04, forward_time=0.145, loss_ctc=77.586, loss_att=60.651, acc=0.678, loss=65.732, backward_time=1.241, grad_norm=94.112, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.045e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 02:18:22,044 (trainer:732) INFO: 13epoch:train:4501-4600batch: iter_time=1.049e-04, forward_time=0.144, loss_ctc=82.242, loss_att=64.662, acc=0.670, loss=69.936, backward_time=1.240, grad_norm=115.961, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.045e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 02:20:59,514 (trainer:732) INFO: 13epoch:train:4601-4700batch: iter_time=1.072e-04, forward_time=0.145, loss_ctc=92.408, loss_att=76.645, acc=0.664, loss=81.374, backward_time=1.244, grad_norm=106.089, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.044e-04, train_time=3.149
+[gpub015:0/64] 2023-07-05 02:23:36,368 (trainer:732) INFO: 13epoch:train:4701-4800batch: iter_time=1.045e-04, forward_time=0.144, loss_ctc=69.859, loss_att=51.347, acc=0.673, loss=56.901, backward_time=1.239, grad_norm=89.948, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.044e-04, train_time=3.137
+[gpub015:0/64] 2023-07-05 02:26:14,585 (trainer:732) INFO: 13epoch:train:4801-4900batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=77.996, loss_att=63.746, acc=0.677, loss=68.021, backward_time=1.244, grad_norm=88.365, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.043e-04, train_time=3.164
+[gpub015:0/64] 2023-07-05 02:28:53,321 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub015:0/64] 2023-07-05 02:29:11,386 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 02:29:15,170 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0715450220>)
+[gpub015:0/64] 2023-07-05 02:29:15,170 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub015:0/64] 2023-07-05 02:29:15,176 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 02:32:58,371 (trainer:732) INFO: 13epoch:train:4901-5000batch: iter_time=1.213, forward_time=0.144, loss_ctc=65.774, loss_att=47.844, acc=0.702, loss=53.223, backward_time=1.251, grad_norm=75.759, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.043e-04, train_time=8.075
+[gpub015:0/64] 2023-07-05 02:35:38,624 (trainer:732) INFO: 13epoch:train:5001-5100batch: iter_time=9.429e-05, forward_time=0.147, loss_ctc=76.403, loss_att=64.562, acc=0.673, loss=68.115, backward_time=1.250, grad_norm=106.879, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.042e-04, train_time=3.205
+[gpub015:0/64] 2023-07-05 02:38:15,647 (trainer:732) INFO: 13epoch:train:5101-5200batch: iter_time=9.749e-05, forward_time=0.145, loss_ctc=71.952, loss_att=51.831, acc=0.688, loss=57.867, backward_time=1.241, grad_norm=89.609, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.042e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 02:40:52,928 (trainer:732) INFO: 13epoch:train:5201-5300batch: iter_time=9.622e-05, forward_time=0.145, loss_ctc=76.889, loss_att=62.062, acc=0.679, loss=66.510, backward_time=1.243, grad_norm=98.013, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.182, optim0_lr0=1.041e-04, train_time=3.145
+[gpub015:0/64] 2023-07-05 02:43:30,305 (trainer:732) INFO: 13epoch:train:5301-5400batch: iter_time=9.506e-05, forward_time=0.146, loss_ctc=83.260, loss_att=66.659, acc=0.681, loss=71.639, backward_time=1.244, grad_norm=97.815, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.041e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 02:46:07,967 (trainer:732) INFO: 13epoch:train:5401-5500batch: iter_time=9.793e-05, forward_time=0.145, loss_ctc=87.108, loss_att=75.168, acc=0.671, loss=78.750, backward_time=1.246, grad_norm=104.971, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.041e-04, train_time=3.153
+[gpub015:0/64] 2023-07-05 02:48:45,133 (trainer:732) INFO: 13epoch:train:5501-5600batch: iter_time=1.216e-04, forward_time=0.145, loss_ctc=73.208, loss_att=55.122, acc=0.679, loss=60.548, backward_time=1.241, grad_norm=98.135, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.040e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 02:51:22,309 (trainer:732) INFO: 13epoch:train:5601-5700batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=73.220, loss_att=56.231, acc=0.682, loss=61.328, backward_time=1.242, grad_norm=88.409, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.040e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 02:53:59,660 (trainer:732) INFO: 13epoch:train:5701-5800batch: iter_time=1.222e-04, forward_time=0.145, loss_ctc=71.839, loss_att=55.799, acc=0.701, loss=60.611, backward_time=1.242, grad_norm=86.829, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.039e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 02:54:51,451 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub015:0/64] 2023-07-05 02:55:09,616 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 02:55:12,984 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0715807520>)
+[gpub015:0/64] 2023-07-05 02:55:12,984 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub015:0/64] 2023-07-05 02:55:12,991 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 03:00:53,451 (trainer:732) INFO: 13epoch:train:5801-5900batch: iter_time=1.204, forward_time=0.145, loss_ctc=74.023, loss_att=61.691, acc=0.691, loss=65.390, backward_time=1.254, grad_norm=93.924, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.039e-04, train_time=8.276
+[gpub015:0/64] 2023-07-05 03:03:31,383 (trainer:732) INFO: 13epoch:train:5901-6000batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=73.747, loss_att=54.734, acc=0.677, loss=60.438, backward_time=1.243, grad_norm=92.066, clip=100.000, loss_scale=1.759e+13, optim_step_time=0.181, optim0_lr0=1.038e-04, train_time=3.158
+[gpub015:0/64] 2023-07-05 03:06:08,513 (trainer:732) INFO: 13epoch:train:6001-6100batch: iter_time=1.025e-04, forward_time=0.143, loss_ctc=73.596, loss_att=56.085, acc=0.692, loss=61.338, backward_time=1.242, grad_norm=87.934, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.038e-04, train_time=3.142
+[gpub015:0/64] 2023-07-05 03:08:45,546 (trainer:732) INFO: 13epoch:train:6101-6200batch: iter_time=9.164e-05, forward_time=0.143, loss_ctc=82.621, loss_att=65.377, acc=0.684, loss=70.550, backward_time=1.242, grad_norm=93.644, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.037e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 03:11:22,948 (trainer:732) INFO: 13epoch:train:6201-6300batch: iter_time=9.824e-05, forward_time=0.144, loss_ctc=83.357, loss_att=69.309, acc=0.670, loss=73.524, backward_time=1.244, grad_norm=95.890, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.037e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 03:14:00,619 (trainer:732) INFO: 13epoch:train:6301-6400batch: iter_time=9.997e-05, forward_time=0.145, loss_ctc=81.690, loss_att=66.899, acc=0.678, loss=71.336, backward_time=1.245, grad_norm=112.957, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.036e-04, train_time=3.153
+[gpub015:0/64] 2023-07-05 03:16:37,757 (trainer:732) INFO: 13epoch:train:6401-6500batch: iter_time=1.112e-04, forward_time=0.145, loss_ctc=73.800, loss_att=53.459, acc=0.684, loss=59.561, backward_time=1.242, grad_norm=92.890, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.036e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 03:19:14,828 (trainer:732) INFO: 13epoch:train:6501-6600batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=74.459, loss_att=61.204, acc=0.684, loss=65.181, backward_time=1.241, grad_norm=87.396, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.036e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 03:20:57,945 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub015:0/64] 2023-07-05 03:21:15,656 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 03:21:19,045 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efa5318b910>)
+[gpub015:0/64] 2023-07-05 03:21:19,045 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub015:0/64] 2023-07-05 03:21:19,052 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 03:25:54,443 (trainer:732) INFO: 13epoch:train:6601-6700batch: iter_time=1.218, forward_time=0.144, loss_ctc=73.140, loss_att=52.638, acc=0.707, loss=58.789, backward_time=1.251, grad_norm=94.399, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.035e-04, train_time=7.992
+[gpub015:0/64] 2023-07-05 03:28:44,608 (trainer:732) INFO: 13epoch:train:6701-6800batch: iter_time=1.096e-04, forward_time=0.144, loss_ctc=73.678, loss_att=58.389, acc=0.673, loss=62.975, backward_time=1.260, grad_norm=107.744, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.035e-04, train_time=3.403
+[gpub015:0/64] 2023-07-05 03:31:30,183 (trainer:732) INFO: 13epoch:train:6801-6900batch: iter_time=1.042e-04, forward_time=0.144, loss_ctc=73.706, loss_att=55.405, acc=0.686, loss=60.895, backward_time=1.247, grad_norm=96.906, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.034e-04, train_time=3.311
+[gpub015:0/64] 2023-07-05 03:34:08,360 (trainer:732) INFO: 13epoch:train:6901-7000batch: iter_time=1.082e-04, forward_time=0.144, loss_ctc=78.905, loss_att=61.541, acc=0.686, loss=66.750, backward_time=1.242, grad_norm=102.781, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.034e-04, train_time=3.163
+[gpub015:0/64] 2023-07-05 03:36:45,724 (trainer:732) INFO: 13epoch:train:7001-7100batch: iter_time=1.027e-04, forward_time=0.144, loss_ctc=82.124, loss_att=65.694, acc=0.673, loss=70.623, backward_time=1.242, grad_norm=99.630, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.033e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 03:39:23,628 (trainer:732) INFO: 13epoch:train:7101-7200batch: iter_time=1.048e-04, forward_time=0.147, loss_ctc=88.303, loss_att=75.234, acc=0.673, loss=79.155, backward_time=1.246, grad_norm=106.563, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.033e-04, train_time=3.158
+[gpub015:0/64] 2023-07-05 03:42:01,838 (trainer:732) INFO: 13epoch:train:7201-7300batch: iter_time=1.028e-04, forward_time=0.144, loss_ctc=68.219, loss_att=50.582, acc=0.684, loss=55.873, backward_time=1.243, grad_norm=81.465, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.033e-04, train_time=3.164
+[gpub015:0/64] 2023-07-05 03:44:39,356 (trainer:732) INFO: 13epoch:train:7301-7400batch: iter_time=1.055e-04, forward_time=0.145, loss_ctc=77.835, loss_att=62.872, acc=0.687, loss=67.360, backward_time=1.244, grad_norm=87.557, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.032e-04, train_time=3.150
+[gpub015:0/64] 2023-07-05 03:47:15,904 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub015:0/64] 2023-07-05 03:47:33,872 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 03:47:37,276 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efc457637c0>)
+[gpub015:0/64] 2023-07-05 03:47:37,276 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub015:0/64] 2023-07-05 03:47:37,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 03:50:05,764 (trainer:732) INFO: 13epoch:train:7401-7500batch: iter_time=1.192, forward_time=0.144, loss_ctc=65.662, loss_att=47.694, acc=0.711, loss=53.085, backward_time=1.246, grad_norm=79.520, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.032e-04, train_time=6.528
+[gpub015:0/64] 2023-07-05 03:52:45,276 (trainer:732) INFO: 13epoch:train:7501-7600batch: iter_time=9.605e-05, forward_time=0.145, loss_ctc=70.791, loss_att=58.658, acc=0.672, loss=62.298, backward_time=1.247, grad_norm=107.372, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.031e-04, train_time=3.190
+[gpub015:0/64] 2023-07-05 03:55:23,506 (trainer:732) INFO: 13epoch:train:7601-7700batch: iter_time=1.037e-04, forward_time=0.145, loss_ctc=72.470, loss_att=52.026, acc=0.694, loss=58.159, backward_time=1.243, grad_norm=94.745, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.031e-04, train_time=3.164
+[gpub015:0/64] 2023-07-05 03:58:01,078 (trainer:732) INFO: 13epoch:train:7701-7800batch: iter_time=9.728e-05, forward_time=0.144, loss_ctc=80.052, loss_att=60.305, acc=0.687, loss=66.229, backward_time=1.241, grad_norm=113.321, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.030e-04, train_time=3.151
+[gpub015:0/64] 2023-07-05 04:00:38,495 (trainer:732) INFO: 13epoch:train:7801-7900batch: iter_time=9.455e-05, forward_time=0.144, loss_ctc=81.656, loss_att=66.033, acc=0.680, loss=70.720, backward_time=1.242, grad_norm=93.460, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.030e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 04:03:16,017 (trainer:732) INFO: 13epoch:train:7901-8000batch: iter_time=1.008e-04, forward_time=0.143, loss_ctc=85.972, loss_att=74.825, acc=0.670, loss=78.169, backward_time=1.244, grad_norm=112.494, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.029e-04, train_time=3.150
+[gpub015:0/64] 2023-07-05 04:05:53,016 (trainer:732) INFO: 13epoch:train:8001-8100batch: iter_time=1.015e-04, forward_time=0.143, loss_ctc=74.498, loss_att=55.317, acc=0.680, loss=61.071, backward_time=1.240, grad_norm=91.464, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.029e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 04:08:30,443 (trainer:732) INFO: 13epoch:train:8101-8200batch: iter_time=9.791e-05, forward_time=0.144, loss_ctc=69.863, loss_att=54.241, acc=0.685, loss=58.927, backward_time=1.242, grad_norm=82.075, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.029e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 04:11:07,674 (trainer:732) INFO: 13epoch:train:8201-8300batch: iter_time=1.092e-04, forward_time=0.144, loss_ctc=68.595, loss_att=52.123, acc=0.706, loss=57.064, backward_time=1.242, grad_norm=79.546, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.181, optim0_lr0=1.028e-04, train_time=3.144
+[gpub015:0/64] 2023-07-05 04:12:00,871 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub015:0/64] 2023-07-05 04:12:18,984 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 04:12:22,440 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efa531df4f0>)
+[gpub015:0/64] 2023-07-05 04:12:22,440 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub015:0/64] 2023-07-05 04:12:22,446 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 04:18:32,238 (trainer:732) INFO: 13epoch:train:8301-8400batch: iter_time=1.223, forward_time=0.146, loss_ctc=72.469, loss_att=59.337, acc=0.686, loss=63.277, backward_time=1.252, grad_norm=96.645, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.028e-04, train_time=8.891
+[gpub015:0/64] 2023-07-05 04:21:10,438 (trainer:732) INFO: 13epoch:train:8401-8500batch: iter_time=1.150e-04, forward_time=0.145, loss_ctc=73.753, loss_att=57.673, acc=0.674, loss=62.497, backward_time=1.243, grad_norm=122.511, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.027e-04, train_time=3.164
+[gpub015:0/64] 2023-07-05 04:23:48,063 (trainer:732) INFO: 13epoch:train:8501-8600batch: iter_time=1.133e-04, forward_time=0.146, loss_ctc=73.060, loss_att=55.876, acc=0.686, loss=61.032, backward_time=1.241, grad_norm=95.020, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.027e-04, train_time=3.152
+[gpub015:0/64] 2023-07-05 04:26:25,397 (trainer:732) INFO: 13epoch:train:8601-8700batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=82.342, loss_att=65.676, acc=0.670, loss=70.675, backward_time=1.242, grad_norm=98.947, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.026e-04, train_time=3.146
+[gpub015:0/64] 2023-07-05 04:29:04,670 (trainer:732) INFO: 13epoch:train:8701-8800batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=82.940, loss_att=68.734, acc=0.670, loss=72.996, backward_time=1.243, grad_norm=94.812, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.026e-04, train_time=3.185
+[gpub015:0/64] 2023-07-05 04:31:43,040 (trainer:732) INFO: 13epoch:train:8801-8900batch: iter_time=1.124e-04, forward_time=0.147, loss_ctc=80.299, loss_att=65.836, acc=0.675, loss=70.175, backward_time=1.243, grad_norm=110.899, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.026e-04, train_time=3.167
+[gpub015:0/64] 2023-07-05 04:34:19,985 (trainer:732) INFO: 13epoch:train:8901-9000batch: iter_time=1.233e-04, forward_time=0.145, loss_ctc=74.553, loss_att=55.213, acc=0.672, loss=61.015, backward_time=1.240, grad_norm=114.627, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.025e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 04:36:57,034 (trainer:732) INFO: 13epoch:train:9001-9100batch: iter_time=1.199e-04, forward_time=0.145, loss_ctc=72.019, loss_att=59.355, acc=0.687, loss=63.154, backward_time=1.241, grad_norm=90.972, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.025e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 04:38:44,132 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub015:0/64] 2023-07-05 04:39:02,444 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 04:39:06,144 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06f7d8bcd0>)
+[gpub015:0/64] 2023-07-05 04:39:06,144 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub015:0/64] 2023-07-05 04:39:06,150 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 04:43:21,746 (trainer:732) INFO: 13epoch:train:9101-9200batch: iter_time=1.211, forward_time=0.146, loss_ctc=72.956, loss_att=53.606, acc=0.696, loss=59.411, backward_time=1.254, grad_norm=87.995, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.024e-04, train_time=7.694
+[gpub015:0/64] 2023-07-05 04:46:02,195 (trainer:732) INFO: 13epoch:train:9201-9300batch: iter_time=1.155e-04, forward_time=0.144, loss_ctc=71.310, loss_att=57.178, acc=0.677, loss=61.418, backward_time=1.247, grad_norm=95.519, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.024e-04, train_time=3.209
+[gpub015:0/64] 2023-07-05 04:48:40,368 (trainer:732) INFO: 13epoch:train:9301-9400batch: iter_time=1.006e-04, forward_time=0.145, loss_ctc=73.757, loss_att=55.866, acc=0.683, loss=61.234, backward_time=1.243, grad_norm=93.753, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.023e-04, train_time=3.163
+[gpub015:0/64] 2023-07-05 04:51:18,276 (trainer:732) INFO: 13epoch:train:9401-9500batch: iter_time=1.172e-04, forward_time=0.144, loss_ctc=76.011, loss_att=59.441, acc=0.685, loss=64.412, backward_time=1.242, grad_norm=89.515, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.023e-04, train_time=3.158
+[gpub015:0/64] 2023-07-05 04:53:55,349 (trainer:732) INFO: 13epoch:train:9501-9600batch: iter_time=1.167e-04, forward_time=0.144, loss_ctc=79.471, loss_att=63.716, acc=0.670, loss=68.443, backward_time=1.241, grad_norm=101.257, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.023e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 04:56:33,102 (trainer:732) INFO: 13epoch:train:9601-9700batch: iter_time=9.937e-05, forward_time=0.145, loss_ctc=89.171, loss_att=76.309, acc=0.664, loss=80.167, backward_time=1.245, grad_norm=108.677, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.022e-04, train_time=3.155
+[gpub015:0/64] 2023-07-05 04:59:10,081 (trainer:732) INFO: 13epoch:train:9701-9800batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=68.036, loss_att=50.680, acc=0.681, loss=55.886, backward_time=1.240, grad_norm=79.168, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.022e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 05:01:47,472 (trainer:732) INFO: 13epoch:train:9801-9900batch: iter_time=1.160e-04, forward_time=0.145, loss_ctc=75.666, loss_att=61.758, acc=0.685, loss=65.930, backward_time=1.245, grad_norm=98.942, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.021e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 05:04:24,479 (trainer:732) INFO: 13epoch:train:9901-10000batch: iter_time=1.035e-04, forward_time=0.145, loss_ctc=64.966, loss_att=46.260, acc=0.711, loss=51.872, backward_time=1.241, grad_norm=91.008, clip=100.000, loss_scale=3.518e+13, optim_step_time=0.182, optim0_lr0=1.021e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 05:17:35,091 (trainer:338) INFO: 13epoch results: [train] iter_time=0.157, forward_time=0.145, loss_ctc=77.137, loss_att=60.711, acc=0.679, loss=65.639, backward_time=1.245, grad_norm=100.824, clip=100.000, loss_scale=2.287e+13, optim_step_time=0.181, optim0_lr0=1.043e-04, train_time=3.766, time=5 hours, 14 minutes and 12.9 seconds, total_count=100000, gpu_max_cached_mem_GB=37.139, [valid] loss_ctc=55.311, cer_ctc=0.308, loss_att=44.884, acc=0.629, cer=0.442, wer=0.994, loss=48.012, time=7 minutes and 2.64 seconds, total_count=10626, gpu_max_cached_mem_GB=37.139, [att_plot] time=5 minutes and 45.23 seconds, total_count=0, gpu_max_cached_mem_GB=37.139
+[gpub015:0/64] 2023-07-05 05:17:50,410 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub015:0/64] 2023-07-05 05:17:50,415 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/8epoch.pth
+[gpub015:0/64] 2023-07-05 05:17:50,415 (trainer:272) INFO: 14/100epoch started. Estimated time to finish: 2 weeks, 5 days and 11 hours
+[gpub015:0/64] 2023-07-05 05:17:50,419 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub015:0/64] 2023-07-05 05:18:08,009 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 05:18:11,352 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f010a677d90>)
+[gpub015:0/64] 2023-07-05 05:18:11,353 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.0, 
+[gpub015:0/64] 2023-07-05 05:18:11,359 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 05:22:38,320 (trainer:732) INFO: 14epoch:train:1-100batch: iter_time=1.236, forward_time=0.146, loss_ctc=67.492, loss_att=49.941, acc=0.685, loss=55.206, backward_time=1.261, grad_norm=90.648, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.020e-04, train_time=5.758
+[gpub015:0/64] 2023-07-05 05:25:16,237 (trainer:732) INFO: 14epoch:train:101-200batch: iter_time=1.042e-04, forward_time=0.145, loss_ctc=76.460, loss_att=60.179, acc=0.660, loss=65.063, backward_time=1.240, grad_norm=93.506, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.020e-04, train_time=3.158
+[gpub015:0/64] 2023-07-05 05:27:54,151 (trainer:732) INFO: 14epoch:train:201-300batch: iter_time=1.080e-04, forward_time=0.146, loss_ctc=71.779, loss_att=54.026, acc=0.681, loss=59.352, backward_time=1.241, grad_norm=86.178, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.020e-04, train_time=3.158
+[gpub015:0/64] 2023-07-05 05:30:31,984 (trainer:732) INFO: 14epoch:train:301-400batch: iter_time=1.215e-04, forward_time=0.146, loss_ctc=74.923, loss_att=54.424, acc=0.671, loss=60.574, backward_time=1.241, grad_norm=97.880, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.019e-04, train_time=3.156
+[gpub015:0/64] 2023-07-05 05:33:17,013 (trainer:732) INFO: 14epoch:train:401-500batch: iter_time=5.366e-04, forward_time=0.209, loss_ctc=74.569, loss_att=59.503, acc=0.676, loss=64.023, backward_time=1.247, grad_norm=94.127, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.184, optim0_lr0=1.019e-04, train_time=3.300
+[gpub015:0/64] 2023-07-05 05:36:02,320 (trainer:732) INFO: 14epoch:train:501-600batch: iter_time=1.064e-04, forward_time=0.200, loss_ctc=68.014, loss_att=54.675, acc=0.665, loss=58.677, backward_time=1.252, grad_norm=82.294, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.184, optim0_lr0=1.018e-04, train_time=3.306
+[gpub015:0/64] 2023-07-05 05:38:53,673 (trainer:732) INFO: 14epoch:train:601-700batch: iter_time=1.074e-04, forward_time=0.162, loss_ctc=79.869, loss_att=65.427, acc=0.663, loss=69.759, backward_time=1.260, grad_norm=100.551, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.183, optim0_lr0=1.018e-04, train_time=3.427
+[gpub015:0/64] 2023-07-05 05:41:42,236 (trainer:732) INFO: 14epoch:train:701-800batch: iter_time=1.092e-04, forward_time=0.146, loss_ctc=87.368, loss_att=57.776, acc=0.685, loss=66.653, backward_time=1.254, grad_norm=128.453, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.017e-04, train_time=3.371
+[gpub015:0/64] 2023-07-05 05:42:44,919 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub015:0/64] 2023-07-05 05:43:02,321 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 05:43:05,674 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f010a674f70>)
+[gpub015:0/64] 2023-07-05 05:43:05,674 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.9, 
+[gpub015:0/64] 2023-07-05 05:43:05,680 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 05:47:29,182 (trainer:732) INFO: 14epoch:train:801-900batch: iter_time=1.271, forward_time=0.146, loss_ctc=80.548, loss_att=57.074, acc=0.681, loss=64.116, backward_time=1.258, grad_norm=96.622, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.017e-04, train_time=6.939
+[gpub015:0/64] 2023-07-05 05:50:06,931 (trainer:732) INFO: 14epoch:train:901-1000batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=78.396, loss_att=65.274, acc=0.669, loss=69.210, backward_time=1.243, grad_norm=98.587, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.017e-04, train_time=3.155
+[gpub015:0/64] 2023-07-05 05:52:44,281 (trainer:732) INFO: 14epoch:train:1001-1100batch: iter_time=1.191e-04, forward_time=0.146, loss_ctc=70.269, loss_att=52.890, acc=0.696, loss=58.104, backward_time=1.243, grad_norm=92.744, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.016e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 05:55:21,561 (trainer:732) INFO: 14epoch:train:1101-1200batch: iter_time=1.158e-04, forward_time=0.145, loss_ctc=71.987, loss_att=52.009, acc=0.680, loss=58.002, backward_time=1.242, grad_norm=90.603, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.016e-04, train_time=3.145
+[gpub015:0/64] 2023-07-05 05:57:58,721 (trainer:732) INFO: 14epoch:train:1201-1300batch: iter_time=1.267e-04, forward_time=0.145, loss_ctc=73.245, loss_att=58.533, acc=0.680, loss=62.946, backward_time=1.242, grad_norm=93.277, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.015e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 06:00:35,732 (trainer:732) INFO: 14epoch:train:1301-1400batch: iter_time=1.379e-04, forward_time=0.144, loss_ctc=69.153, loss_att=56.204, acc=0.675, loss=60.089, backward_time=1.242, grad_norm=87.236, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.015e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 06:03:13,091 (trainer:732) INFO: 14epoch:train:1401-1500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=75.620, loss_att=62.912, acc=0.676, loss=66.725, backward_time=1.242, grad_norm=122.064, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.014e-04, train_time=3.147
+[gpub015:0/64] 2023-07-05 06:05:50,085 (trainer:732) INFO: 14epoch:train:1501-1600batch: iter_time=1.158e-04, forward_time=0.144, loss_ctc=86.422, loss_att=59.210, acc=0.686, loss=67.374, backward_time=1.240, grad_norm=121.982, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.014e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 06:07:45,814 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub015:0/64] 2023-07-05 06:08:04,021 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 06:08:07,429 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.10", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.10", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.10", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.10", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f017499d6c0>)
+[gpub015:0/64] 2023-07-05 06:08:07,429 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.10, 
+[gpub015:0/64] 2023-07-05 06:08:07,435 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 06:11:42,622 (trainer:732) INFO: 14epoch:train:1601-1700batch: iter_time=1.516, forward_time=0.145, loss_ctc=91.881, loss_att=63.528, acc=0.682, loss=72.034, backward_time=1.250, grad_norm=122.952, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.014e-04, train_time=7.051
+[gpub015:0/64] 2023-07-05 06:14:20,460 (trainer:732) INFO: 14epoch:train:1701-1800batch: iter_time=1.177e-04, forward_time=0.145, loss_ctc=65.864, loss_att=52.353, acc=0.669, loss=56.407, backward_time=1.243, grad_norm=85.260, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.013e-04, train_time=3.157
+[gpub015:0/64] 2023-07-05 06:16:57,381 (trainer:732) INFO: 14epoch:train:1801-1900batch: iter_time=1.220e-04, forward_time=0.143, loss_ctc=80.381, loss_att=62.098, acc=0.676, loss=67.583, backward_time=1.240, grad_norm=109.302, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.013e-04, train_time=3.138
+[gpub015:0/64] 2023-07-05 06:19:34,703 (trainer:732) INFO: 14epoch:train:1901-2000batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=65.835, loss_att=46.913, acc=0.695, loss=52.589, backward_time=1.240, grad_norm=77.097, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.012e-04, train_time=3.146
+[gpub015:0/64] 2023-07-05 06:22:11,686 (trainer:732) INFO: 14epoch:train:2001-2100batch: iter_time=1.216e-04, forward_time=0.144, loss_ctc=74.781, loss_att=55.605, acc=0.680, loss=61.358, backward_time=1.240, grad_norm=98.143, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.012e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 06:24:48,909 (trainer:732) INFO: 14epoch:train:2101-2200batch: iter_time=1.146e-04, forward_time=0.145, loss_ctc=70.364, loss_att=58.732, acc=0.665, loss=62.221, backward_time=1.243, grad_norm=96.995, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.012e-04, train_time=3.144
+[gpub015:0/64] 2023-07-05 06:27:26,091 (trainer:732) INFO: 14epoch:train:2201-2300batch: iter_time=1.191e-04, forward_time=0.145, loss_ctc=71.398, loss_att=61.066, acc=0.665, loss=64.165, backward_time=1.242, grad_norm=86.951, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.011e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 06:30:03,134 (trainer:732) INFO: 14epoch:train:2301-2400batch: iter_time=1.059e-04, forward_time=0.145, loss_ctc=79.594, loss_att=55.367, acc=0.691, loss=62.635, backward_time=1.241, grad_norm=111.641, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.011e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 06:32:40,642 (trainer:732) INFO: 14epoch:train:2401-2500batch: iter_time=1.181e-04, forward_time=0.146, loss_ctc=91.170, loss_att=66.737, acc=0.666, loss=74.067, backward_time=1.244, grad_norm=122.083, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.010e-04, train_time=3.150
+[gpub015:0/64] 2023-07-05 06:32:43,499 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub015:0/64] 2023-07-05 06:33:01,325 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 06:33:04,751 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efbe822fc40>)
+[gpub015:0/64] 2023-07-05 06:33:04,751 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.1, 
+[gpub015:0/64] 2023-07-05 06:33:04,758 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 06:38:39,312 (trainer:732) INFO: 14epoch:train:2501-2600batch: iter_time=1.217, forward_time=0.147, loss_ctc=66.054, loss_att=47.825, acc=0.704, loss=53.294, backward_time=1.250, grad_norm=90.048, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.010e-04, train_time=7.173
+[gpub015:0/64] 2023-07-05 06:41:16,644 (trainer:732) INFO: 14epoch:train:2601-2700batch: iter_time=1.133e-04, forward_time=0.145, loss_ctc=74.725, loss_att=59.961, acc=0.674, loss=64.390, backward_time=1.242, grad_norm=98.791, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.010e-04, train_time=3.146
+[gpub015:0/64] 2023-07-05 06:43:53,864 (trainer:732) INFO: 14epoch:train:2701-2800batch: iter_time=1.041e-04, forward_time=0.145, loss_ctc=71.636, loss_att=53.864, acc=0.695, loss=59.196, backward_time=1.242, grad_norm=88.306, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.009e-04, train_time=3.144
+[gpub015:0/64] 2023-07-05 06:46:30,941 (trainer:732) INFO: 14epoch:train:2801-2900batch: iter_time=1.197e-04, forward_time=0.146, loss_ctc=72.656, loss_att=52.692, acc=0.678, loss=58.681, backward_time=1.241, grad_norm=93.367, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.009e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 06:49:08,213 (trainer:732) INFO: 14epoch:train:2901-3000batch: iter_time=1.044e-04, forward_time=0.144, loss_ctc=72.828, loss_att=58.523, acc=0.687, loss=62.815, backward_time=1.242, grad_norm=92.957, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.008e-04, train_time=3.145
+[gpub015:0/64] 2023-07-05 06:51:45,294 (trainer:732) INFO: 14epoch:train:3001-3100batch: iter_time=1.069e-04, forward_time=0.143, loss_ctc=68.485, loss_att=54.235, acc=0.679, loss=58.510, backward_time=1.241, grad_norm=90.151, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.008e-04, train_time=3.141
+[gpub015:0/64] 2023-07-05 06:54:28,653 (trainer:732) INFO: 14epoch:train:3101-3200batch: iter_time=9.495e-05, forward_time=0.144, loss_ctc=79.365, loss_att=65.692, acc=0.672, loss=69.794, backward_time=1.247, grad_norm=93.123, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.007e-04, train_time=3.267
+[gpub015:0/64] 2023-07-05 06:57:07,175 (trainer:732) INFO: 14epoch:train:3201-3300batch: iter_time=9.992e-05, forward_time=0.144, loss_ctc=85.733, loss_att=57.493, acc=0.694, loss=65.965, backward_time=1.243, grad_norm=134.602, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.181, optim0_lr0=1.007e-04, train_time=3.170
+[gpub015:0/64] 2023-07-05 06:58:02,021 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub015:0/64] 2023-07-05 06:58:20,249 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 06:58:23,672 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06f6ec7e80>)
+[gpub015:0/64] 2023-07-05 06:58:23,673 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.8, 
+[gpub015:0/64] 2023-07-05 06:58:23,679 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 07:02:56,111 (trainer:732) INFO: 14epoch:train:3301-3400batch: iter_time=1.206, forward_time=0.206, loss_ctc=75.066, loss_att=55.125, acc=0.684, loss=61.108, backward_time=1.254, grad_norm=97.874, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.187, optim0_lr0=1.007e-04, train_time=6.978
+[gpub015:0/64] 2023-07-05 07:05:33,511 (trainer:732) INFO: 14epoch:train:3401-3500batch: iter_time=1.175e-04, forward_time=0.147, loss_ctc=80.268, loss_att=64.486, acc=0.664, loss=69.220, backward_time=1.243, grad_norm=102.801, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.006e-04, train_time=3.148
+[gpub015:0/64] 2023-07-05 07:08:10,474 (trainer:732) INFO: 14epoch:train:3501-3600batch: iter_time=1.258e-04, forward_time=0.146, loss_ctc=70.086, loss_att=52.785, acc=0.689, loss=57.975, backward_time=1.240, grad_norm=91.850, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.006e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 07:10:47,426 (trainer:732) INFO: 14epoch:train:3601-3700batch: iter_time=1.143e-04, forward_time=0.146, loss_ctc=68.709, loss_att=48.855, acc=0.690, loss=54.811, backward_time=1.241, grad_norm=84.286, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.005e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 07:13:25,490 (trainer:732) INFO: 14epoch:train:3701-3800batch: iter_time=1.245e-04, forward_time=0.146, loss_ctc=71.880, loss_att=56.870, acc=0.684, loss=61.373, backward_time=1.243, grad_norm=101.212, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.005e-04, train_time=3.161
+[gpub015:0/64] 2023-07-05 07:16:02,505 (trainer:732) INFO: 14epoch:train:3801-3900batch: iter_time=1.332e-04, forward_time=0.146, loss_ctc=65.690, loss_att=53.660, acc=0.671, loss=57.269, backward_time=1.240, grad_norm=103.338, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.005e-04, train_time=3.140
+[gpub015:0/64] 2023-07-05 07:18:39,723 (trainer:732) INFO: 14epoch:train:3901-4000batch: iter_time=1.226e-04, forward_time=0.147, loss_ctc=75.033, loss_att=62.084, acc=0.672, loss=65.969, backward_time=1.243, grad_norm=101.234, clip=100.000, loss_scale=7.037e+13, optim_step_time=0.182, optim0_lr0=1.004e-04, train_time=3.144
+[gpub015:0/64] 2023-07-05 07:21:16,597 (trainer:732) INFO: 14epoch:train:4001-4100batch: iter_time=1.200e-04, forward_time=0.145, loss_ctc=83.330, loss_att=57.692, acc=0.689, loss=65.383, backward_time=1.241, grad_norm=106.096, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.182, optim0_lr0=1.004e-04, train_time=3.137
+[gpub015:0/64] 2023-07-05 07:23:05,096 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub015:0/64] 2023-07-05 07:23:23,163 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 07:23:26,571 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06f705aef0>)
+[gpub015:0/64] 2023-07-05 07:23:26,571 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.4, 
+[gpub015:0/64] 2023-07-05 07:23:26,577 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 07:27:52,257 (trainer:732) INFO: 14epoch:train:4101-4200batch: iter_time=1.222, forward_time=0.146, loss_ctc=87.297, loss_att=61.477, acc=0.683, loss=69.223, backward_time=1.252, grad_norm=113.635, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.182, optim0_lr0=1.003e-04, train_time=7.913
+[gpub015:0/64] 2023-07-05 07:30:30,045 (trainer:732) INFO: 14epoch:train:4201-4300batch: iter_time=9.853e-05, forward_time=0.145, loss_ctc=67.259, loss_att=52.733, acc=0.675, loss=57.091, backward_time=1.241, grad_norm=99.259, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.003e-04, train_time=3.156
+[gpub015:0/64] 2023-07-05 07:33:07,210 (trainer:732) INFO: 14epoch:train:4301-4400batch: iter_time=1.197e-04, forward_time=0.144, loss_ctc=75.113, loss_att=56.926, acc=0.690, loss=62.382, backward_time=1.242, grad_norm=106.822, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.003e-04, train_time=3.143
+[gpub015:0/64] 2023-07-05 07:35:43,948 (trainer:732) INFO: 14epoch:train:4401-4500batch: iter_time=1.184e-04, forward_time=0.143, loss_ctc=68.567, loss_att=50.621, acc=0.682, loss=56.005, backward_time=1.240, grad_norm=83.577, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.002e-04, train_time=3.135
+[gpub015:0/64] 2023-07-05 07:38:30,505 (trainer:732) INFO: 14epoch:train:4501-4600batch: iter_time=1.128e-04, forward_time=0.146, loss_ctc=71.674, loss_att=50.888, acc=0.691, loss=57.124, backward_time=1.253, grad_norm=85.775, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.002e-04, train_time=3.331
+[gpub015:0/64] 2023-07-05 07:41:07,397 (trainer:732) INFO: 14epoch:train:4601-4700batch: iter_time=1.265e-04, forward_time=0.143, loss_ctc=73.227, loss_att=60.909, acc=0.677, loss=64.604, backward_time=1.241, grad_norm=95.135, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.001e-04, train_time=3.138
+[gpub015:0/64] 2023-07-05 07:43:46,708 (trainer:732) INFO: 14epoch:train:4701-4800batch: iter_time=1.181e-04, forward_time=0.145, loss_ctc=68.192, loss_att=55.093, acc=0.669, loss=59.023, backward_time=1.241, grad_norm=88.359, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.001e-04, train_time=3.186
+[gpub015:0/64] 2023-07-05 07:46:23,653 (trainer:732) INFO: 14epoch:train:4801-4900batch: iter_time=1.105e-04, forward_time=0.145, loss_ctc=79.220, loss_att=59.755, acc=0.682, loss=65.594, backward_time=1.241, grad_norm=120.294, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.001e-04, train_time=3.139
+[gpub015:0/64] 2023-07-05 07:49:07,600 (trainer:732) INFO: 14epoch:train:4901-5000batch: iter_time=1.119e-04, forward_time=0.145, loss_ctc=86.729, loss_att=63.418, acc=0.679, loss=70.412, backward_time=1.251, grad_norm=111.675, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=1.000e-04, train_time=3.279
+[gpub015:0/64] 2023-07-05 07:49:10,466 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub015:0/64] 2023-07-05 07:49:28,643 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 07:49:32,109 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0715d0bdf0>)
+[gpub015:0/64] 2023-07-05 07:49:32,109 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.7, 
+[gpub015:0/64] 2023-07-05 07:49:32,115 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 07:54:27,305 (trainer:732) INFO: 14epoch:train:5001-5100batch: iter_time=1.182, forward_time=0.145, loss_ctc=65.866, loss_att=47.473, acc=0.709, loss=52.991, backward_time=1.255, grad_norm=80.499, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.998e-05, train_time=6.394
+[gpub015:0/64] 2023-07-05 07:57:05,479 (trainer:732) INFO: 14epoch:train:5101-5200batch: iter_time=1.089e-04, forward_time=0.146, loss_ctc=72.265, loss_att=58.470, acc=0.677, loss=62.608, backward_time=1.242, grad_norm=95.899, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.994e-05, train_time=3.163
+[gpub015:0/64] 2023-07-05 07:59:42,843 (trainer:732) INFO: 14epoch:train:5201-5300batch: iter_time=1.169e-04, forward_time=0.146, loss_ctc=68.799, loss_att=51.505, acc=0.703, loss=56.693, backward_time=1.242, grad_norm=91.972, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.990e-05, train_time=3.147
+[gpub015:0/64] 2023-07-05 08:02:19,845 (trainer:732) INFO: 14epoch:train:5301-5400batch: iter_time=1.145e-04, forward_time=0.145, loss_ctc=71.880, loss_att=51.948, acc=0.684, loss=57.928, backward_time=1.240, grad_norm=84.364, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.986e-05, train_time=3.140
+[gpub015:0/64] 2023-07-05 08:04:57,349 (trainer:732) INFO: 14epoch:train:5401-5500batch: iter_time=1.105e-04, forward_time=0.146, loss_ctc=71.747, loss_att=58.949, acc=0.688, loss=62.788, backward_time=1.241, grad_norm=95.166, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.982e-05, train_time=3.150
+[gpub015:0/64] 2023-07-05 08:07:36,385 (trainer:732) INFO: 14epoch:train:5501-5600batch: iter_time=1.111e-04, forward_time=0.146, loss_ctc=66.692, loss_att=52.899, acc=0.683, loss=57.037, backward_time=1.241, grad_norm=86.102, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.978e-05, train_time=3.181
+[gpub015:0/64] 2023-07-05 08:10:15,268 (trainer:732) INFO: 14epoch:train:5601-5700batch: iter_time=1.079e-04, forward_time=0.146, loss_ctc=76.339, loss_att=65.761, acc=0.678, loss=68.934, backward_time=1.244, grad_norm=93.247, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.974e-05, train_time=3.177
+[gpub015:0/64] 2023-07-05 08:12:52,081 (trainer:732) INFO: 14epoch:train:5701-5800batch: iter_time=1.003e-04, forward_time=0.144, loss_ctc=82.139, loss_att=57.379, acc=0.692, loss=64.807, backward_time=1.240, grad_norm=128.174, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.970e-05, train_time=3.136
+[gpub015:0/64] 2023-07-05 08:13:46,941 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub015:0/64] 2023-07-05 08:14:04,844 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 08:14:08,310 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f06347db4f0>)
+[gpub015:0/64] 2023-07-05 08:14:08,310 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.5, 
+[gpub015:0/64] 2023-07-05 08:14:08,316 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 08:19:19,699 (trainer:732) INFO: 14epoch:train:5801-5900batch: iter_time=1.217, forward_time=0.145, loss_ctc=71.224, loss_att=52.241, acc=0.699, loss=57.936, backward_time=1.247, grad_norm=98.314, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.966e-05, train_time=7.752
+[gpub015:0/64] 2023-07-05 08:21:59,636 (trainer:732) INFO: 14epoch:train:5901-6000batch: iter_time=1.174e-04, forward_time=0.145, loss_ctc=74.291, loss_att=61.484, acc=0.686, loss=65.326, backward_time=1.251, grad_norm=92.289, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.962e-05, train_time=3.199
+[gpub015:0/64] 2023-07-05 08:24:37,095 (trainer:732) INFO: 14epoch:train:6001-6100batch: iter_time=1.225e-04, forward_time=0.145, loss_ctc=73.843, loss_att=54.203, acc=0.692, loss=60.095, backward_time=1.241, grad_norm=98.891, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.958e-05, train_time=3.149
+[gpub015:0/64] 2023-07-05 08:27:16,819 (trainer:732) INFO: 14epoch:train:6101-6200batch: iter_time=1.314e-04, forward_time=0.145, loss_ctc=63.824, loss_att=46.318, acc=0.701, loss=51.570, backward_time=1.242, grad_norm=82.169, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.954e-05, train_time=3.194
+[gpub015:0/64] 2023-07-05 08:29:58,576 (trainer:732) INFO: 14epoch:train:6201-6300batch: iter_time=1.331e-04, forward_time=0.146, loss_ctc=76.866, loss_att=57.439, acc=0.689, loss=63.267, backward_time=1.252, grad_norm=97.819, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.950e-05, train_time=3.235
+[gpub015:0/64] 2023-07-05 08:32:35,761 (trainer:732) INFO: 14epoch:train:6301-6400batch: iter_time=1.293e-04, forward_time=0.144, loss_ctc=66.634, loss_att=56.034, acc=0.687, loss=59.214, backward_time=1.241, grad_norm=102.060, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.946e-05, train_time=3.143
+[gpub015:0/64] 2023-07-05 08:35:12,784 (trainer:732) INFO: 14epoch:train:6401-6500batch: iter_time=1.259e-04, forward_time=0.145, loss_ctc=72.410, loss_att=59.662, acc=0.683, loss=63.486, backward_time=1.242, grad_norm=99.566, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.942e-05, train_time=3.140
+[gpub015:0/64] 2023-07-05 08:37:49,786 (trainer:732) INFO: 14epoch:train:6501-6600batch: iter_time=1.133e-04, forward_time=0.144, loss_ctc=83.069, loss_att=58.236, acc=0.699, loss=65.686, backward_time=1.242, grad_norm=114.626, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.938e-05, train_time=3.140
+[gpub015:0/64] 2023-07-05 08:39:39,785 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub015:0/64] 2023-07-05 08:39:57,765 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 08:40:01,165 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0715d6d300>)
+[gpub015:0/64] 2023-07-05 08:40:01,165 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.2, 
+[gpub015:0/64] 2023-07-05 08:40:01,171 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 08:45:39,161 (trainer:732) INFO: 14epoch:train:6601-6700batch: iter_time=1.302, forward_time=0.147, loss_ctc=81.601, loss_att=58.591, acc=0.689, loss=65.494, backward_time=1.254, grad_norm=111.016, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.182, optim0_lr0=9.935e-05, train_time=9.386
+[gpub015:0/64] 2023-07-05 08:48:20,753 (trainer:732) INFO: 14epoch:train:6701-6800batch: iter_time=1.204e-04, forward_time=0.152, loss_ctc=64.824, loss_att=53.322, acc=0.679, loss=56.773, backward_time=1.251, grad_norm=91.480, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.931e-05, train_time=3.233
+[gpub015:0/64] 2023-07-05 08:50:57,965 (trainer:732) INFO: 14epoch:train:6801-6900batch: iter_time=1.248e-04, forward_time=0.145, loss_ctc=74.189, loss_att=56.752, acc=0.696, loss=61.983, backward_time=1.242, grad_norm=88.689, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.927e-05, train_time=3.144
+[gpub015:0/64] 2023-07-05 08:53:34,859 (trainer:732) INFO: 14epoch:train:6901-7000batch: iter_time=1.250e-04, forward_time=0.144, loss_ctc=67.349, loss_att=48.904, acc=0.691, loss=54.437, backward_time=1.240, grad_norm=96.258, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.923e-05, train_time=3.138
+[gpub015:0/64] 2023-07-05 08:56:11,811 (trainer:732) INFO: 14epoch:train:7001-7100batch: iter_time=1.255e-04, forward_time=0.145, loss_ctc=70.955, loss_att=50.384, acc=0.695, loss=56.555, backward_time=1.239, grad_norm=99.455, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.919e-05, train_time=3.139
+[gpub015:0/64] 2023-07-05 08:58:48,943 (trainer:732) INFO: 14epoch:train:7101-7200batch: iter_time=1.220e-04, forward_time=0.143, loss_ctc=73.206, loss_att=61.616, acc=0.676, loss=65.093, backward_time=1.241, grad_norm=86.657, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.915e-05, train_time=3.142
+[gpub015:0/64] 2023-07-05 09:01:26,106 (trainer:732) INFO: 14epoch:train:7201-7300batch: iter_time=1.328e-04, forward_time=0.146, loss_ctc=66.192, loss_att=55.575, acc=0.671, loss=58.760, backward_time=1.241, grad_norm=92.181, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.911e-05, train_time=3.143
+[gpub015:0/64] 2023-07-05 09:04:03,534 (trainer:732) INFO: 14epoch:train:7301-7400batch: iter_time=1.310e-04, forward_time=0.145, loss_ctc=79.538, loss_att=59.235, acc=0.689, loss=65.326, backward_time=1.242, grad_norm=100.524, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.907e-05, train_time=3.148
+[gpub015:0/64] 2023-07-05 09:06:42,404 (trainer:732) INFO: 14epoch:train:7401-7500batch: iter_time=1.288e-04, forward_time=0.144, loss_ctc=86.486, loss_att=62.275, acc=0.679, loss=69.538, backward_time=1.242, grad_norm=101.606, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.181, optim0_lr0=9.903e-05, train_time=3.177
+[gpub015:0/64] 2023-07-05 09:06:48,753 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub015:0/64] 2023-07-05 09:07:06,613 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 09:07:10,023 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.11", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.11", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.11", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.11", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efae71b13c0>)
+[gpub015:0/64] 2023-07-05 09:07:10,023 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.11, 
+[gpub015:0/64] 2023-07-05 09:07:10,095 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 09:14:08,695 (trainer:732) INFO: 14epoch:train:7501-7600batch: iter_time=1.646, forward_time=0.177, loss_ctc=69.696, loss_att=50.684, acc=0.697, loss=56.387, backward_time=1.258, grad_norm=83.667, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.183, optim0_lr0=9.899e-05, train_time=8.925
+[gpub015:0/64] 2023-07-05 09:16:46,222 (trainer:732) INFO: 14epoch:train:7601-7700batch: iter_time=9.515e-05, forward_time=0.144, loss_ctc=69.322, loss_att=56.992, acc=0.685, loss=60.691, backward_time=1.242, grad_norm=99.714, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.896e-05, train_time=3.151
+[gpub015:0/64] 2023-07-05 09:19:23,334 (trainer:732) INFO: 14epoch:train:7701-7800batch: iter_time=1.117e-04, forward_time=0.144, loss_ctc=71.131, loss_att=52.075, acc=0.696, loss=57.792, backward_time=1.242, grad_norm=86.667, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.892e-05, train_time=3.142
+[gpub015:0/64] 2023-07-05 09:22:00,431 (trainer:732) INFO: 14epoch:train:7801-7900batch: iter_time=1.339e-04, forward_time=0.144, loss_ctc=73.686, loss_att=51.618, acc=0.692, loss=58.238, backward_time=1.241, grad_norm=96.390, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.888e-05, train_time=3.142
+[gpub015:0/64] 2023-07-05 09:24:37,992 (trainer:732) INFO: 14epoch:train:7901-8000batch: iter_time=1.229e-04, forward_time=0.145, loss_ctc=70.794, loss_att=61.014, acc=0.684, loss=63.948, backward_time=1.244, grad_norm=96.805, clip=100.000, loss_scale=1.407e+14, optim_step_time=0.180, optim0_lr0=9.884e-05, train_time=3.151
+[gpub015:0/64] 2023-07-05 09:27:15,174 (trainer:732) INFO: 14epoch:train:8001-8100batch: iter_time=1.156e-04, forward_time=0.144, loss_ctc=67.169, loss_att=55.923, acc=0.676, loss=59.297, backward_time=1.241, grad_norm=91.698, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.880e-05, train_time=3.143
+[gpub015:0/64] 2023-07-05 09:29:52,590 (trainer:732) INFO: 14epoch:train:8101-8200batch: iter_time=1.278e-04, forward_time=0.145, loss_ctc=78.525, loss_att=62.256, acc=0.685, loss=67.136, backward_time=1.243, grad_norm=99.114, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.876e-05, train_time=3.148
+[gpub015:0/64] 2023-07-05 09:32:30,203 (trainer:732) INFO: 14epoch:train:8201-8300batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=81.362, loss_att=55.922, acc=0.694, loss=63.554, backward_time=1.242, grad_norm=95.643, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.872e-05, train_time=3.152
+[gpub015:0/64] 2023-07-05 09:33:34,938 (multiple_iter_factory:32) INFO: Building 10th iter-factory...
+[gpub015:0/64] 2023-07-05 09:33:53,041 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 09:33:56,452 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f0714d38580>)
+[gpub015:0/64] 2023-07-05 09:33:56,452 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.6, 
+[gpub015:0/64] 2023-07-05 09:33:56,459 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+[gpub015:0/64] 2023-07-05 09:39:03,625 (trainer:732) INFO: 14epoch:train:8301-8400batch: iter_time=1.854, forward_time=0.165, loss_ctc=73.073, loss_att=53.289, acc=0.697, loss=59.224, backward_time=1.253, grad_norm=90.126, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.182, optim0_lr0=9.869e-05, train_time=7.868
+[gpub015:0/64] 2023-07-05 09:41:49,065 (trainer:732) INFO: 14epoch:train:8401-8500batch: iter_time=1.238e-04, forward_time=0.143, loss_ctc=74.007, loss_att=61.877, acc=0.673, loss=65.516, backward_time=1.249, grad_norm=104.208, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.865e-05, train_time=3.309
+[gpub015:0/64] 2023-07-05 09:44:26,311 (trainer:732) INFO: 14epoch:train:8501-8600batch: iter_time=1.224e-04, forward_time=0.144, loss_ctc=72.420, loss_att=54.139, acc=0.689, loss=59.623, backward_time=1.241, grad_norm=86.207, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.861e-05, train_time=3.145
+[gpub015:0/64] 2023-07-05 09:47:03,231 (trainer:732) INFO: 14epoch:train:8601-8700batch: iter_time=1.255e-04, forward_time=0.144, loss_ctc=63.298, loss_att=45.849, acc=0.702, loss=51.084, backward_time=1.241, grad_norm=78.605, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.857e-05, train_time=3.138
+[gpub015:0/64] 2023-07-05 09:49:40,176 (trainer:732) INFO: 14epoch:train:8701-8800batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=75.575, loss_att=55.981, acc=0.687, loss=61.859, backward_time=1.241, grad_norm=95.599, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.853e-05, train_time=3.139
+[gpub015:0/64] 2023-07-05 09:52:37,889 (trainer:732) INFO: 14epoch:train:8801-8900batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=65.438, loss_att=56.468, acc=0.674, loss=59.159, backward_time=1.391, grad_norm=78.677, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.849e-05, train_time=3.554
+[gpub015:0/64] 2023-07-05 09:55:59,096 (trainer:732) INFO: 14epoch:train:8901-9000batch: iter_time=1.066e-04, forward_time=0.144, loss_ctc=70.899, loss_att=58.105, acc=0.677, loss=61.943, backward_time=1.561, grad_norm=86.385, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.846e-05, train_time=4.024
+[gpub015:0/64] 2023-07-05 09:59:18,624 (trainer:732) INFO: 14epoch:train:9001-9100batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=81.516, loss_att=57.092, acc=0.700, loss=64.419, backward_time=1.548, grad_norm=122.948, clip=100.000, loss_scale=2.815e+14, optim_step_time=0.180, optim0_lr0=9.842e-05, train_time=3.990
+[gpub015:0/64] 2023-07-05 10:01:35,903 (multiple_iter_factory:32) INFO: Building 11th iter-factory...
+[gpub015:0/64] 2023-07-05 10:01:53,829 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub015:0/64] 2023-07-05 10:01:57,524 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits12/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits12/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7efab8b037c0>)
+[gpub015:0/64] 2023-07-05 10:01:57,525 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=37994, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits12/speech_shape/split.3, 
+[gpub015:0/64] 2023-07-05 10:01:57,531 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=37994, mean=128.0, min=128, max=129
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML
+  warnings.warn("Can't initialize NVML")
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1534, in all_reduce
+    work = default_pg.allreduce([tensor], opts)
+RuntimeError: CUDA error: unknown error
+CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
+For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
+gpub078:4170392:4170392 [3] NCCL INFO comm 0x4f67f390 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 6] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 5] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 13] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 12] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 15] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 14] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 4] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 22] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 21] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 20] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 23] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub037:1522725:1522810 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub037:1522724:1522808 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub032:3289604:3289696 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub032:3289605:3289693 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub032:3289606:3289626 [0] NCCL INFO comm 0x501cec20 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub026:2433085:2433171 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub026:2433084:2433172 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub026:2433086:2433173 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub037:1522724:1522745 [0] NCCL INFO comm 0xab8ed350 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub032:3289604:3289627 [0] NCCL INFO comm 0x50c34690 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub032:3289605:3289624 [0] NCCL INFO comm 0xb6f8bc90 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub026:2433085:2433106 [0] NCCL INFO comm 0xb7dab990 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub037:1522725:1522747 [0] NCCL INFO comm 0x4f7df910 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub026:2433084:2433108 [0] NCCL INFO comm 0x4fe36690 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub026:2433086:2433109 [0] NCCL INFO comm 0xc27df910 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub037:1522723:1522746 [0] NCCL INFO comm 0xba5d23a0 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub037:1522722:1522811 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub037:1522722:1522744 [0] NCCL INFO comm 0x514cae40 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 39] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 38] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub052:1901670:1901757 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+[W ProcessGroupNCCL.cpp:948] [Rank 36] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 37] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub052:1901670:1901690 [0] NCCL INFO comm 0xb6ced700 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub052:1901669:1901759 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub052:1901668:1901758 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub052:1901668:1901689 [0] NCCL INFO comm 0x50134230 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub052:1901669:1901688 [0] NCCL INFO comm 0x50c05250 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub052:1901667:1901760 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub032:3289603:3289625 [0] NCCL INFO comm 0x9f95b40 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub052:1901667:1901691 [0] NCCL INFO comm 0xbc2124a0 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub080:4113203:4113295 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub080:4113203:4113203 [0] NCCL INFO comm 0xa21d7f0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub015:879783:879861 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub015:879783:879783 [3] NCCL INFO comm 0x5071eb50 rank 3 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Process SpawnProcess-3:
+gpub079:2657933:2658017 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 14] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 13] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub079:2657933:2657933 [1] NCCL INFO comm 0x8f776d0 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-1:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 36] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 6] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 15] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 23] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 12] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 5] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 51] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 50] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub079:2657934:2658014 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub079:2657935:2658016 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+[W ProcessGroupNCCL.cpp:948] [Rank 48] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 28] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 4] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 29] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+gpub079:2657935:2657956 [0] NCCL INFO comm 0x4edd83d0 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 21] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 31] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 20] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 22] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub050:1879228:1879313 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub050:1879226:1879312 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+[W ProcessGroupNCCL.cpp:948] [Rank 30] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub050:1879227:1879310 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 47] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800494 milliseconds before timing out.
+gpub079:2657934:2657955 [0] NCCL INFO comm 0x505ec9b0 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub050:1879226:1879246 [0] NCCL INFO comm 0x50792660 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub050:1879228:1879247 [0] NCCL INFO comm 0x5177dae0 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub050:1879227:1879245 [0] NCCL INFO comm 0x50baa200 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 38] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 33] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 32] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 34] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 35] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 39] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 37] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 47. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 52] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800348 milliseconds before timing out.
+gpub051:2913626:2913716 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub050:1879225:1879311 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub051:2913625:2913713 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub051:2913626:2913649 [0] NCCL INFO comm 0x9e42a10 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub050:1879225:1879248 [0] NCCL INFO comm 0xa81f9440 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub051:2913625:2913648 [0] NCCL INFO comm 0xb9b5ccd0 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub079:2657932:2657957 [0] NCCL INFO comm 0x8c890be0 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub015:879781:879860 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub015:879781:879781 [1] NCCL INFO comm 0x8d09c1b0 rank 1 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 3] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800141 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 49] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800195 milliseconds before timing out.
+gpub051:2913623:2913647 [0] NCCL INFO comm 0x8dc3e980 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Process SpawnProcess-4:
+Traceback (most recent call last):
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 31] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 35] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 29] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub051:2913624:2913650 [0] NCCL INFO comm 0xbb329750 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 28] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+gpub081:2742228:2742325 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 34] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub081:2742228:2742228 [1] NCCL INFO comm 0xb78a1250 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 48] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 1] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800012 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 30.  Original reason for failure was: [Rank 30] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 51.  Original reason for failure was: [Rank 51] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 32] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 50.  Original reason for failure was: [Rank 50] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub081:2742227:2742227 [0] NCCL INFO comm 0x518b4950 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 55] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 54] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 53] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 33] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 49. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub080:4113206:4113298 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub080:4113204:4113297 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub080:4113205:4113296 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub080:4113206:4113224 [0] NCCL INFO comm 0x8c72c2a0 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub080:4113205:4113226 [0] NCCL INFO comm 0x50af0e00 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub080:4113204:4113225 [0] NCCL INFO comm 0xb71b4bf0 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 57] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800134 milliseconds before timing out.
+gpub078:4170389:4170389 [0] NCCL INFO comm 0x4f656710 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 55] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 56] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800687 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 54] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 44] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800885 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 53] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 52. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 61] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 60] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 63] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 62] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub015:879780:879780 [0] NCCL INFO comm 0x51871d20 rank 0 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 24] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 27] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 26] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 25] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub082:1518446:1518535 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub082:1518446:1518467 [0] NCCL INFO comm 0xb6caaae0 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub082:1518447:1518469 [0] NCCL INFO comm 0xb6376a90 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub082:1518448:1518468 [0] NCCL INFO comm 0x8c5b6f90 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub049:4064877:4064950 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub049:4064875:4064949 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub049:4064877:4064897 [0] NCCL INFO comm 0x4f5c00a0 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub049:4064875:4064894 [0] NCCL INFO comm 0xa8769be0 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub049:4064876:4064896 [0] NCCL INFO comm 0xb89777d0 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub049:4064874:4064895 [0] NCCL INFO comm 0x500f4c60 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 59] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 58] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub081:2742229:2742324 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub081:2742230:2742253 [0] NCCL INFO comm 0xba992be0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub081:2742229:2742252 [0] NCCL INFO comm 0x50f92c00 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 61] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub082:1518445:1518470 [0] NCCL INFO comm 0x519aa9d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 0] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1801161 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 25] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 24] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 27] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 26] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 62.  Original reason for failure was: [Rank 62] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 63.  Original reason for failure was: [Rank 63] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 60] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 44. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 59] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 0. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 58] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 7] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 9] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 10] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub031:1921205:1921294 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+[W ProcessGroupNCCL.cpp:948] [Rank 8] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub026:2433087:2433174 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub026:2433087:2433107 [0] NCCL INFO comm 0x50347080 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub031:1921205:1921228 [0] NCCL INFO comm 0x92a3a80 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub031:1921206:1921295 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub031:1921206:1921230 [0] NCCL INFO comm 0xc2e65190 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub053:1664487:1664568 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub053:1664487:1664487 [1] NCCL INFO comm 0x506110d0 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub031:1921204:1921227 [0] NCCL INFO comm 0xb63f1750 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 9] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 7] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 10] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 8] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 57. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 17] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 18] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub036:1870498:1870586 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub036:1870498:1870518 [0] NCCL INFO comm 0x50c66a10 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 16] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 19] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub036:1870499:1870587 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub036:1870499:1870519 [0] NCCL INFO comm 0xa269c50 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub036:1870497:1870521 [0] NCCL INFO comm 0x4fcaadc0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 41] Caught collective operation timeout: WorkNCCL(SeqNum=3865712, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800142 milliseconds before timing out.
+[W ProcessGroupNCCL.cpp:948] [Rank 2] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub015:879782:879859 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub015:879782:879802 [0] NCCL INFO comm 0x502ad7c0 rank 2 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 17] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 42] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 40] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 43] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 46] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 45] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub053:1664489:1664567 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub053:1664488:1664566 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 18] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub053:1664489:1664509 [0] NCCL INFO comm 0xa9e28fe0 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub036:1870496:1870520 [0] NCCL INFO comm 0xad17bd0 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub053:1664488:1664511 [0] NCCL INFO comm 0xe3027a0 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub078:4170391:4170477 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub078:4170390:4170479 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub078:4170390:4170413 [0] NCCL INFO comm 0x1d97f440 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub078:4170391:4170416 [0] NCCL INFO comm 0x5187a990 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 19.  Original reason for failure was: [Rank 19] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 2] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 16] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub053:1664486:1664508 [0] NCCL INFO comm 0x4f7ecd60 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 42] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 46] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 45] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 43] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 40] Found key in store: NCCLABORTEDCOMM:20b41dac1c1773000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 41. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    main()
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    S2TTask.main(cmd=cmd)
+    S2TTask.main(cmd=cmd)
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+    S2TTask.main(cmd=cmd)
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+    raise ProcessExitedException(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    raise ProcessExitedException(
+    return _run_code(code, main_globals, None,
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    raise ProcessExitedException(
+    S2TTask.main(cmd=cmd)
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    main()
+    raise ProcessExitedException(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+srun: error: gpub052: task 9: Exited with exit code 1
+srun: error: gpub026: task 1: Exited with exit code 1
+srun: error: gpub080: task 13: Exited with exit code 1
+srun: error: gpub079: task 12: Exited with exit code 1
+srun: error: gpub015: task 0: Exited with exit code 1
+srun: error: gpub081: task 14: Exited with exit code 1
+srun: error: gpub082: task 15: Exited with exit code 1
+srun: error: gpub032: task 3: Exited with exit code 1
+srun: error: gpub050: task 7: Exited with exit code 1
+srun: error: gpub037: task 5: Exited with exit code 1
+srun: error: gpub049: task 6: Exited with exit code 1
+srun: error: gpub078: task 11: Exited with exit code 1
+srun: error: gpub051: task 8: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+srun: error: gpub036: task 4: Exited with exit code 1
+srun: error: gpub053: task 10: Exited with exit code 1
+srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.9.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.9.log
new file mode 100644
index 0000000000000000000000000000000000000000..1e9a300c024da9605a228fbeaf2681e3f144b313
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.9.log
@@ -0,0 +1,4247 @@
+# Running on gpub001.delta.ncsa.illinois.edu
+# Started at Mon Jul 3 22:24:10 CDT 2023
+# SLURMD_NODENAME=gpub001
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2121665
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE='64(x16)'
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2121665
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST='gpub[001-002,015-016,022,030-032,059-060,066-067,076-077,079,096]'
+# SLURM_JOB_NUM_NODES=16
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=16
+# SLURM_NODEID=0
+# SLURM_NODELIST='gpub[001-002,015-016,022,030-032,059-060,066-067,076-077,079,096]'
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE='1(x16)'
+# SLURM_TASK_PID=383686
+# SLURM_TOPOLOGY_ADDR=ss00.ss09.gpub001
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# srun --export=ALL python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fol/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+d_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distrd_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits10/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits10/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+ibuted true --dist_launcher slurm --dist_init_method file:///scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/.dist_init_cc45b274-aa68-4d2c-943c-66da258b53f0
+[gpub001:0/64] 2023-07-03 22:27:37,296 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub001:0/64] 2023-07-03 22:27:37,982 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 64 nodes.
+[gpub001:0/64] 2023-07-03 22:27:38,012 (s2t:483) INFO: Vocabulary size: 50002
+[gpub001:0/64] 2023-07-03 22:27:54,659 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub001:0/64] 2023-07-03 22:27:54,668 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub001:0/64] 2023-07-03 22:27:54,668 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub001:0/64] 2023-07-03 22:27:54,669 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub001:0/64] 2023-07-03 22:27:54,674 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub001:0/64] 2023-07-03 22:27:55,384 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub001:0/64] 2023-07-03 22:28:02,397 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-03 22:28:02,589 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9662457e80>)
+[gpub001:0/64] 2023-07-03 22:28:02,589 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/64] 2023-07-03 22:28:02,599 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub001:0/64] 2023-07-03 22:28:03,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-03 22:28:03,400 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9662457b20>)
+[gpub001:0/64] 2023-07-03 22:28:03,400 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub001:0/64] 2023-07-03 22:28:03,400 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub001:0/64] 2023-07-03 22:28:31,184 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+gpub001:383774:383774 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:383774:383774 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:383774:383774 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+[gpub001:0/64] 2023-07-03 22:28:36,765 (trainer:284) INFO: 9/100epoch started
+[gpub001:0/64] 2023-07-03 22:28:36,808 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-03 22:28:58,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-03 22:29:02,284 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f958aecd5d0>)
+[gpub001:0/64] 2023-07-03 22:29:02,284 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-03 22:29:02,292 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+gpub022:3399536:3399536 [2] NCCL INFO cudaDriverVersion 12010
+gpub022:3399536:3399536 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3399536:3399536 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3399536:3399614 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3399536:3399614 [2] NCCL INFO Using network IB
+gpub022:3399536:3399614 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub022:3399536:3399614 [2] NCCL INFO Trees [0] 19/-1/-1->18->17 [1] 19/-1/-1->18->17
+gpub022:3399536:3399614 [2] NCCL INFO Channel 00/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub022:3399536:3399614 [2] NCCL INFO Channel 01/0 : 18[85000] -> 19[c7000] via P2P/IPC
+gpub022:3399536:3399614 [2] NCCL INFO Connected all rings
+gpub022:3399536:3399614 [2] NCCL INFO Channel 00/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub022:3399536:3399614 [2] NCCL INFO Channel 01/0 : 18[85000] -> 17[46000] via P2P/IPC
+gpub022:3399536:3399614 [2] NCCL INFO Connected all trees
+gpub022:3399536:3399614 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3399536:3399614 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3399536:3399614 [2] NCCL INFO comm 0x93f2210 rank 18 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub022:3399535:3399535 [1] NCCL INFO cudaDriverVersion 12010
+gpub022:3399535:3399535 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3399535:3399535 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3399535:3399615 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3399535:3399615 [1] NCCL INFO Using network IB
+gpub022:3399535:3399615 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub022:3399535:3399615 [1] NCCL INFO Trees [0] 18/8/-1->17->16 [1] 18/-1/-1->17->16
+gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub022:3399535:3399615 [1] NCCL INFO Channel 01/0 : 17[46000] -> 18[85000] via P2P/IPC
+gpub022:3399535:3399615 [1] NCCL INFO Connected all rings
+gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [receive] via NET/IB/0
+gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [send] via NET/IB/0
+gpub002:1756559:1756559 [0] NCCL INFO cudaDriverVersion 12010
+gpub002:1756559:1756559 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:1756559:1756559 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:1756559:1756638 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:1756559:1756638 [0] NCCL INFO Using network IB
+gpub002:1756559:1756638 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub002:1756559:1756638 [0] NCCL INFO Trees [0] 5/-1/-1->4->9 [1] 5/0/-1->4->12
+gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 4[7000] -> 5[46000] via P2P/IPC
+gpub002:1756559:1756638 [0] NCCL INFO Connected all rings
+gpub060:1938145:1938145 [2] NCCL INFO cudaDriverVersion 12010
+gpub060:1938145:1938145 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0>
+gpub060:1938145:1938145 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub060:1938145:1938218 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0>
+gpub060:1938145:1938218 [2] NCCL INFO Using network IB
+gpub060:1938145:1938218 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub060:1938145:1938218 [2] NCCL INFO Trees [0] 39/-1/-1->38->37 [1] 39/-1/-1->38->37
+gpub060:1938145:1938218 [2] NCCL INFO Channel 00/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub060:1938145:1938218 [2] NCCL INFO Channel 01/0 : 38[85000] -> 39[c7000] via P2P/IPC
+gpub060:1938145:1938218 [2] NCCL INFO Connected all rings
+gpub060:1938145:1938218 [2] NCCL INFO Channel 00/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub060:1938145:1938218 [2] NCCL INFO Channel 01/0 : 38[85000] -> 37[46000] via P2P/IPC
+gpub022:3399535:3399615 [1] NCCL INFO Channel 00/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub022:3399535:3399615 [1] NCCL INFO Channel 01/0 : 17[46000] -> 16[7000] via P2P/IPC
+gpub022:3399535:3399615 [1] NCCL INFO Connected all trees
+gpub022:3399535:3399615 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3399535:3399615 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3399535:3399615 [1] NCCL INFO comm 0x4fa312f0 rank 17 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [send] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [send] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [receive] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [receive] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [send] via NET/IB/0
+gpub002:1756559:1756638 [0] NCCL INFO Connected all trees
+gpub002:1756559:1756638 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:1756559:1756638 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:1756559:1756638 [0] NCCL INFO comm 0x51930090 rank 4 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub060:1938145:1938218 [2] NCCL INFO Connected all trees
+gpub060:1938145:1938218 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub060:1938145:1938218 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub060:1938145:1938218 [2] NCCL INFO comm 0xb591e2d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub022:3399534:3399534 [0] NCCL INFO cudaDriverVersion 12010
+gpub022:3399534:3399534 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3399534:3399534 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3399534:3399616 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3399534:3399616 [0] NCCL INFO Using network IB
+gpub022:3399534:3399616 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub022:3399534:3399616 [0] NCCL INFO Trees [0] 17/24/-1->16->33 [1] 17/-1/-1->16->20
+gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 16[7000] -> 17[46000] via P2P/IPC
+gpub022:3399534:3399616 [0] NCCL INFO Connected all rings
+gpub002:1756561:1756561 [2] NCCL INFO cudaDriverVersion 12010
+gpub002:1756561:1756561 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:1756561:1756561 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:1756561:1756637 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:1756561:1756637 [2] NCCL INFO Using network IB
+gpub002:1756561:1756637 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub002:1756561:1756637 [2] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5
+gpub002:1756561:1756637 [2] NCCL INFO Channel 00/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub002:1756561:1756637 [2] NCCL INFO Channel 01/0 : 6[85000] -> 7[c7000] via P2P/IPC
+gpub002:1756561:1756637 [2] NCCL INFO Connected all rings
+gpub002:1756561:1756637 [2] NCCL INFO Channel 00/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub002:1756561:1756637 [2] NCCL INFO Channel 01/0 : 6[85000] -> 5[46000] via P2P/IPC
+gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [send] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [send] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [send] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [receive] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [receive] via NET/IB/0
+gpub022:3399534:3399616 [0] NCCL INFO Connected all trees
+gpub022:3399534:3399616 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3399534:3399616 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3399534:3399616 [0] NCCL INFO comm 0x50711f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub002:1756561:1756637 [2] NCCL INFO Connected all trees
+gpub002:1756561:1756637 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:1756561:1756637 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:1756561:1756637 [2] NCCL INFO comm 0x51ad54d0 rank 6 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub002:1756562:1756562 [3] NCCL INFO cudaDriverVersion 12010
+gpub002:1756562:1756562 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:1756562:1756562 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:1756562:1756636 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:1756562:1756636 [3] NCCL INFO Using network IB
+gpub002:1756562:1756636 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub002:1756562:1756636 [3] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6
+gpub002:1756562:1756636 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub002:1756562:1756636 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [send] via NET/IB/0
+gpub002:1756562:1756636 [3] NCCL INFO Connected all rings
+gpub002:1756562:1756636 [3] NCCL INFO Channel 00/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub002:1756562:1756636 [3] NCCL INFO Channel 01/0 : 7[c7000] -> 6[85000] via P2P/IPC
+gpub002:1756562:1756636 [3] NCCL INFO Connected all trees
+gpub002:1756562:1756636 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:1756562:1756636 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:1756562:1756636 [3] NCCL INFO comm 0x9ca8ab90 rank 7 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub002:1756560:1756560 [1] NCCL INFO cudaDriverVersion 12010
+gpub002:1756560:1756560 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.102<0>
+gpub002:1756560:1756560 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub002:1756560:1756635 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.102<0>
+gpub002:1756560:1756635 [1] NCCL INFO Using network IB
+gpub002:1756560:1756635 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub002:1756560:1756635 [1] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/8/-1->5->4
+gpub002:1756560:1756635 [1] NCCL INFO Channel 00/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 5[46000] -> 6[85000] via P2P/IPC
+gpub002:1756560:1756635 [1] NCCL INFO Connected all rings
+gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [send] via NET/IB/0
+gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [receive] via NET/IB/0
+gpub002:1756560:1756635 [1] NCCL INFO Channel 00/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub002:1756560:1756635 [1] NCCL INFO Channel 01/0 : 5[46000] -> 4[7000] via P2P/IPC
+gpub002:1756560:1756635 [1] NCCL INFO Connected all trees
+gpub002:1756560:1756635 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub002:1756560:1756635 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub002:1756560:1756635 [1] NCCL INFO comm 0x17829840 rank 5 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub032:3246893:3246893 [1] NCCL INFO cudaDriverVersion 12010
+gpub032:3246893:3246893 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3246893:3246893 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3246893:3246975 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3246893:3246975 [1] NCCL INFO Using network IB
+gpub032:3246893:3246975 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub032:3246893:3246975 [1] NCCL INFO Trees [0] 30/-1/-1->29->28 [1] 30/44/-1->29->28
+gpub032:3246893:3246975 [1] NCCL INFO Channel 00/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 29[46000] -> 30[85000] via P2P/IPC
+gpub032:3246893:3246975 [1] NCCL INFO Connected all rings
+gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [send] via NET/IB/0
+gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [receive] via NET/IB/0
+gpub032:3246893:3246975 [1] NCCL INFO Channel 00/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub032:3246893:3246975 [1] NCCL INFO Channel 01/0 : 29[46000] -> 28[7000] via P2P/IPC
+gpub032:3246893:3246975 [1] NCCL INFO Connected all trees
+gpub032:3246893:3246975 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3246893:3246975 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3246893:3246975 [1] NCCL INFO comm 0x9a6ad00 rank 29 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub076:3343845:3343845 [2] NCCL INFO cudaDriverVersion 12010
+gpub076:3343845:3343845 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0>
+gpub076:3343845:3343845 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub076:3343845:3343918 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0>
+gpub076:3343845:3343918 [2] NCCL INFO Using network IB
+gpub076:3343845:3343918 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub076:3343845:3343918 [2] NCCL INFO Trees [0] 51/-1/-1->50->49 [1] 51/-1/-1->50->49
+gpub076:3343845:3343918 [2] NCCL INFO Channel 00/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub076:3343845:3343918 [2] NCCL INFO Channel 01/0 : 50[85000] -> 51[c7000] via P2P/IPC
+gpub076:3343845:3343918 [2] NCCL INFO Connected all rings
+gpub076:3343845:3343918 [2] NCCL INFO Channel 00/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub076:3343845:3343918 [2] NCCL INFO Channel 01/0 : 50[85000] -> 49[46000] via P2P/IPC
+gpub076:3343845:3343918 [2] NCCL INFO Connected all trees
+gpub076:3343845:3343918 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub076:3343845:3343918 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub076:3343845:3343918 [2] NCCL INFO comm 0x4fe2ad90 rank 50 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub022:3399537:3399537 [3] NCCL INFO cudaDriverVersion 12010
+gpub022:3399537:3399537 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.122<0>
+gpub022:3399537:3399537 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub022:3399537:3399617 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.122<0>
+gpub022:3399537:3399617 [3] NCCL INFO Using network IB
+gpub022:3399537:3399617 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub022:3399537:3399617 [3] NCCL INFO Trees [0] -1/-1/-1->19->18 [1] -1/-1/-1->19->18
+gpub022:3399537:3399617 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub022:3399537:3399617 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [send] via NET/IB/0
+gpub022:3399537:3399617 [3] NCCL INFO Connected all rings
+gpub022:3399537:3399617 [3] NCCL INFO Channel 00/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub022:3399537:3399617 [3] NCCL INFO Channel 01/0 : 19[c7000] -> 18[85000] via P2P/IPC
+gpub022:3399537:3399617 [3] NCCL INFO Connected all trees
+gpub022:3399537:3399617 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub022:3399537:3399617 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub022:3399537:3399617 [3] NCCL INFO comm 0x50214710 rank 19 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub077:252892:252892 [0] NCCL INFO cudaDriverVersion 12010
+gpub077:252892:252892 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:252892:252892 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:252892:252962 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:252892:252962 [0] NCCL INFO Using network IB
+gpub077:252892:252962 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub077:252892:252962 [0] NCCL INFO Trees [0] 53/-1/-1->52->57 [1] 53/48/-1->52->45
+gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 52[7000] -> 53[46000] via P2P/IPC
+gpub077:252892:252962 [0] NCCL INFO Connected all rings
+gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [receive] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [send] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [receive] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [send] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [receive] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [send] via NET/IB/0
+gpub077:252892:252962 [0] NCCL INFO Connected all trees
+gpub077:252892:252962 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:252892:252962 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:252892:252962 [0] NCCL INFO comm 0x97aafd0 rank 52 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub015:828881:828881 [3] NCCL INFO cudaDriverVersion 12010
+gpub015:828881:828881 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:828881:828881 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:828881:828953 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:828881:828953 [3] NCCL INFO Using network IB
+gpub015:828881:828953 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub015:828881:828953 [3] NCCL INFO Trees [0] -1/-1/-1->11->10 [1] -1/-1/-1->11->10
+gpub015:828881:828953 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub015:828881:828953 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [send] via NET/IB/0
+gpub015:828881:828953 [3] NCCL INFO Connected all rings
+gpub015:828881:828953 [3] NCCL INFO Channel 00/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub015:828881:828953 [3] NCCL INFO Channel 01/0 : 11[c7000] -> 10[85000] via P2P/IPC
+gpub015:828881:828953 [3] NCCL INFO Connected all trees
+gpub015:828881:828953 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:828881:828953 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:828881:828953 [3] NCCL INFO comm 0xb64dad10 rank 11 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub076:3343846:3343846 [3] NCCL INFO cudaDriverVersion 12010
+gpub076:3343846:3343846 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0>
+gpub076:3343846:3343846 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub076:3343846:3343921 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0>
+gpub076:3343846:3343921 [3] NCCL INFO Using network IB
+gpub076:3343846:3343921 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub076:3343846:3343921 [3] NCCL INFO Trees [0] -1/-1/-1->51->50 [1] -1/-1/-1->51->50
+gpub076:3343846:3343921 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub076:3343846:3343921 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 52[7000] [send] via NET/IB/0
+gpub076:3343846:3343921 [3] NCCL INFO Connected all rings
+gpub076:3343846:3343921 [3] NCCL INFO Channel 00/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub076:3343846:3343921 [3] NCCL INFO Channel 01/0 : 51[c7000] -> 50[85000] via P2P/IPC
+gpub076:3343846:3343921 [3] NCCL INFO Connected all trees
+gpub076:3343846:3343921 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub076:3343846:3343921 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub076:3343846:3343921 [3] NCCL INFO comm 0x50888c10 rank 51 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub067:1390513:1390513 [0] NCCL INFO cudaDriverVersion 12010
+gpub067:1390513:1390513 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1390513:1390513 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1390513:1390587 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1390513:1390587 [0] NCCL INFO Using network IB
+gpub067:1390513:1390587 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub067:1390513:1390587 [0] NCCL INFO Trees [0] 45/-1/-1->44->40 [1] 45/36/-1->44->29
+gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [receive] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 44[7000] -> 45[46000] via P2P/IPC
+gpub067:1390513:1390587 [0] NCCL INFO Connected all rings
+gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [receive] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [receive] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 29[46000] -> 44[7000] [receive] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 44[7000] -> 29[46000] [send] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [send] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [send] via NET/IB/0
+gpub067:1390513:1390587 [0] NCCL INFO Connected all trees
+gpub067:1390513:1390587 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1390513:1390587 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1390513:1390587 [0] NCCL INFO comm 0x4ef73970 rank 44 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub015:828878:828878 [0] NCCL INFO cudaDriverVersion 12010
+gpub015:828878:828878 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:828878:828878 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:828878:828950 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:828878:828950 [0] NCCL INFO Using network IB
+gpub015:828878:828950 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub015:828878:828950 [0] NCCL INFO Trees [0] 9/12/-1->8->17 [1] 9/-1/-1->8->5
+gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 7[c7000] -> 8[7000] [receive] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 8[7000] -> 9[46000] via P2P/IPC
+gpub015:828878:828950 [0] NCCL INFO Connected all rings
+gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 5[46000] -> 8[7000] [receive] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [send] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 8[7000] -> 17[46000] [send] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 17[46000] -> 8[7000] [receive] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [receive] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Channel 01/0 : 8[7000] -> 5[46000] [send] via NET/IB/0
+gpub015:828878:828950 [0] NCCL INFO Connected all trees
+gpub015:828878:828950 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:828878:828950 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:828878:828950 [0] NCCL INFO comm 0x8fc63100 rank 8 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub015:828879:828879 [1] NCCL INFO cudaDriverVersion 12010
+gpub015:828879:828879 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:828879:828879 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:828879:828952 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:828879:828952 [1] NCCL INFO Using network IB
+gpub015:828879:828952 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub015:828879:828952 [1] NCCL INFO Trees [0] 10/4/-1->9->8 [1] 10/-1/-1->9->8
+gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub015:828879:828952 [1] NCCL INFO Channel 01/0 : 9[46000] -> 10[85000] via P2P/IPC
+gpub015:828879:828952 [1] NCCL INFO Connected all rings
+gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 4[7000] -> 9[46000] [receive] via NET/IB/0
+gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 9[46000] -> 4[7000] [send] via NET/IB/0
+gpub015:828879:828952 [1] NCCL INFO Channel 00/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub015:828879:828952 [1] NCCL INFO Channel 01/0 : 9[46000] -> 8[7000] via P2P/IPC
+gpub015:828879:828952 [1] NCCL INFO Connected all trees
+gpub015:828879:828952 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:828879:828952 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:828879:828952 [1] NCCL INFO comm 0x8ad4b90 rank 9 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub076:3343843:3343843 [0] NCCL INFO cudaDriverVersion 12010
+gpub076:3343843:3343843 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0>
+gpub076:3343843:3343843 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub076:3343843:3343919 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0>
+gpub076:3343843:3343919 [0] NCCL INFO Using network IB
+gpub076:3343843:3343919 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub076:3343843:3343919 [0] NCCL INFO Trees [0] 49/56/-1->48->32 [1] 49/-1/-1->48->52
+gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [receive] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 48[7000] -> 49[46000] via P2P/IPC
+gpub076:3343843:3343919 [0] NCCL INFO Connected all rings
+gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 48[7000] -> 52[7000] [send] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [send] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [receive] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [send] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [receive] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Channel 01/0 : 52[7000] -> 48[7000] [receive] via NET/IB/0
+gpub076:3343843:3343919 [0] NCCL INFO Connected all trees
+gpub076:3343843:3343919 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub076:3343843:3343919 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub076:3343843:3343919 [0] NCCL INFO comm 0x508de3f0 rank 48 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub076:3343844:3343844 [1] NCCL INFO cudaDriverVersion 12010
+gpub076:3343844:3343844 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.176<0>
+gpub076:3343844:3343844 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub076:3343844:3343920 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.176<0>
+gpub076:3343844:3343920 [1] NCCL INFO Using network IB
+gpub076:3343844:3343920 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub076:3343844:3343920 [1] NCCL INFO Trees [0] 50/40/-1->49->48 [1] 50/-1/-1->49->48
+gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub076:3343844:3343920 [1] NCCL INFO Channel 01/0 : 49[46000] -> 50[85000] via P2P/IPC
+gpub076:3343844:3343920 [1] NCCL INFO Connected all rings
+gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [receive] via NET/IB/0
+gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [send] via NET/IB/0
+gpub076:3343844:3343920 [1] NCCL INFO Channel 00/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub076:3343844:3343920 [1] NCCL INFO Channel 01/0 : 49[46000] -> 48[7000] via P2P/IPC
+gpub076:3343844:3343920 [1] NCCL INFO Connected all trees
+gpub076:3343844:3343920 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub076:3343844:3343920 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub076:3343844:3343920 [1] NCCL INFO comm 0xb838ee00 rank 49 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub015:828880:828880 [2] NCCL INFO cudaDriverVersion 12010
+gpub015:828880:828880 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.115<0>
+gpub015:828880:828880 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub015:828880:828951 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.115<0>
+gpub015:828880:828951 [2] NCCL INFO Using network IB
+gpub015:828880:828951 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub015:828880:828951 [2] NCCL INFO Trees [0] 11/-1/-1->10->9 [1] 11/-1/-1->10->9
+gpub015:828880:828951 [2] NCCL INFO Channel 00/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub015:828880:828951 [2] NCCL INFO Channel 01/0 : 10[85000] -> 11[c7000] via P2P/IPC
+gpub015:828880:828951 [2] NCCL INFO Connected all rings
+gpub015:828880:828951 [2] NCCL INFO Channel 00/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub015:828880:828951 [2] NCCL INFO Channel 01/0 : 10[85000] -> 9[46000] via P2P/IPC
+gpub015:828880:828951 [2] NCCL INFO Connected all trees
+gpub015:828880:828951 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub015:828880:828951 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub015:828880:828951 [2] NCCL INFO comm 0x9e67ed0 rank 10 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub001:383775:383775 [1] NCCL INFO cudaDriverVersion 12010
+gpub001:383775:383775 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:383775:383775 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:383775:383855 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:383775:383855 [1] NCCL INFO Using network IB
+gpub001:383775:383855 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub001:383775:383855 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0
+gpub001:383775:383855 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:383775:383855 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub001:383775:383855 [1] NCCL INFO Connected all rings
+gpub001:383775:383855 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:383775:383855 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub001:383775:383855 [1] NCCL INFO Connected all trees
+gpub001:383775:383855 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:383775:383855 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:383775:383855 [1] NCCL INFO comm 0x8e376a10 rank 1 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub001:383776:383776 [2] NCCL INFO cudaDriverVersion 12010
+gpub001:383776:383776 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:383776:383776 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:383776:383853 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:383776:383853 [2] NCCL INFO Using network IB
+gpub001:383776:383853 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub001:383776:383853 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1
+gpub001:383776:383853 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:383776:383853 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub001:383776:383853 [2] NCCL INFO Connected all rings
+gpub001:383776:383853 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:383776:383853 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub001:383776:383853 [2] NCCL INFO Connected all trees
+gpub079:2616803:2616803 [0] NCCL INFO cudaDriverVersion 12010
+gpub079:2616803:2616803 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2616803:2616803 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2616803:2616883 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2616803:2616883 [0] NCCL INFO Using network IB
+gpub079:2616803:2616883 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub079:2616803:2616883 [0] NCCL INFO Trees [0] 57/60/-1->56->48 [1] 57/-1/-1->56->53
+gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [receive] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 56[7000] -> 57[46000] via P2P/IPC
+gpub079:2616803:2616883 [0] NCCL INFO Connected all rings
+gpub001:383776:383853 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:383776:383853 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:383776:383853 [2] NCCL INFO comm 0xa0c5f40 rank 2 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [receive] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [send] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 48[7000] -> 56[7000] [receive] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 56[7000] -> 48[7000] [send] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [receive] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [send] via NET/IB/0
+gpub079:2616803:2616883 [0] NCCL INFO Connected all trees
+gpub079:2616803:2616883 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2616803:2616883 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2616803:2616883 [0] NCCL INFO comm 0xa9779a50 rank 56 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub067:1390515:1390515 [2] NCCL INFO cudaDriverVersion 12010
+gpub067:1390515:1390515 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1390515:1390515 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1390515:1390586 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1390515:1390586 [2] NCCL INFO Using network IB
+gpub067:1390515:1390586 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub067:1390515:1390586 [2] NCCL INFO Trees [0] 47/-1/-1->46->45 [1] 47/-1/-1->46->45
+gpub067:1390515:1390586 [2] NCCL INFO Channel 00/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub067:1390515:1390586 [2] NCCL INFO Channel 01/0 : 46[85000] -> 47[c7000] via P2P/IPC
+gpub067:1390515:1390586 [2] NCCL INFO Connected all rings
+gpub067:1390515:1390586 [2] NCCL INFO Channel 00/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub067:1390515:1390586 [2] NCCL INFO Channel 01/0 : 46[85000] -> 45[46000] via P2P/IPC
+gpub067:1390515:1390586 [2] NCCL INFO Connected all trees
+gpub067:1390515:1390586 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1390515:1390586 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1390515:1390586 [2] NCCL INFO comm 0x5030f0d0 rank 46 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub067:1390514:1390514 [1] NCCL INFO cudaDriverVersion 12010
+gpub067:1390514:1390514 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1390514:1390514 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1390514:1390588 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1390514:1390588 [1] NCCL INFO Using network IB
+gpub067:1390514:1390588 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub067:1390514:1390588 [1] NCCL INFO Trees [0] 46/-1/-1->45->44 [1] 46/52/-1->45->44
+gpub067:1390514:1390588 [1] NCCL INFO Channel 00/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 45[46000] -> 46[85000] via P2P/IPC
+gpub067:1390514:1390588 [1] NCCL INFO Connected all rings
+gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 45[46000] -> 52[7000] [send] via NET/IB/0
+gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 52[7000] -> 45[46000] [receive] via NET/IB/0
+gpub077:252893:252893 [1] NCCL INFO cudaDriverVersion 12010
+gpub077:252893:252893 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:252893:252893 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:252893:252961 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:252893:252961 [1] NCCL INFO Using network IB
+gpub077:252893:252961 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub077:252893:252961 [1] NCCL INFO Trees [0] 54/-1/-1->53->52 [1] 54/56/-1->53->52
+gpub077:252893:252961 [1] NCCL INFO Channel 00/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 53[46000] -> 54[85000] via P2P/IPC
+gpub077:252893:252961 [1] NCCL INFO Connected all rings
+gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 53[46000] -> 56[7000] [send] via NET/IB/0
+gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 56[7000] -> 53[46000] [receive] via NET/IB/0
+gpub067:1390514:1390588 [1] NCCL INFO Channel 00/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub067:1390514:1390588 [1] NCCL INFO Channel 01/0 : 45[46000] -> 44[7000] via P2P/IPC
+gpub067:1390514:1390588 [1] NCCL INFO Connected all trees
+gpub067:1390514:1390588 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1390514:1390588 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1390514:1390588 [1] NCCL INFO comm 0xa70b75d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub077:252893:252961 [1] NCCL INFO Channel 00/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub077:252893:252961 [1] NCCL INFO Channel 01/0 : 53[46000] -> 52[7000] via P2P/IPC
+gpub077:252893:252961 [1] NCCL INFO Connected all trees
+gpub077:252893:252961 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:252893:252961 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:252893:252961 [1] NCCL INFO comm 0x509e6280 rank 53 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub016:1380823:1380823 [2] NCCL INFO cudaDriverVersion 12010
+gpub016:1380823:1380823 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1380823:1380823 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1380823:1380896 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1380823:1380896 [2] NCCL INFO Using network IB
+gpub016:1380823:1380896 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub016:1380823:1380896 [2] NCCL INFO Trees [0] 15/-1/-1->14->13 [1] 15/-1/-1->14->13
+gpub016:1380823:1380896 [2] NCCL INFO Channel 00/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub016:1380823:1380896 [2] NCCL INFO Channel 01/0 : 14[85000] -> 15[c7000] via P2P/IPC
+gpub016:1380823:1380896 [2] NCCL INFO Connected all rings
+gpub016:1380823:1380896 [2] NCCL INFO Channel 00/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub016:1380823:1380896 [2] NCCL INFO Channel 01/0 : 14[85000] -> 13[46000] via P2P/IPC
+gpub016:1380823:1380896 [2] NCCL INFO Connected all trees
+gpub016:1380823:1380896 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub016:1380823:1380896 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1380823:1380896 [2] NCCL INFO comm 0x517fee10 rank 14 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub032:3246894:3246894 [2] NCCL INFO cudaDriverVersion 12010
+gpub032:3246894:3246894 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3246894:3246894 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3246894:3246976 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3246894:3246976 [2] NCCL INFO Using network IB
+gpub032:3246894:3246976 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub032:3246894:3246976 [2] NCCL INFO Trees [0] 31/-1/-1->30->29 [1] 31/-1/-1->30->29
+gpub032:3246894:3246976 [2] NCCL INFO Channel 00/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub032:3246894:3246976 [2] NCCL INFO Channel 01/0 : 30[85000] -> 31[c7000] via P2P/IPC
+gpub032:3246894:3246976 [2] NCCL INFO Connected all rings
+gpub032:3246894:3246976 [2] NCCL INFO Channel 00/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub032:3246894:3246976 [2] NCCL INFO Channel 01/0 : 30[85000] -> 29[46000] via P2P/IPC
+gpub032:3246894:3246976 [2] NCCL INFO Connected all trees
+gpub032:3246894:3246976 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3246894:3246976 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3246894:3246976 [2] NCCL INFO comm 0x9ddee7e0 rank 30 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub001:383777:383777 [3] NCCL INFO cudaDriverVersion 12010
+gpub001:383777:383777 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.101<0>
+gpub001:383777:383777 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub001:383777:383854 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:383777:383854 [3] NCCL INFO Using network IB
+gpub001:383777:383854 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub001:383777:383854 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2
+gpub001:383777:383854 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:383777:383854 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 4[7000] [send] via NET/IB/0
+gpub001:383777:383854 [3] NCCL INFO Connected all rings
+gpub001:383777:383854 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:383777:383854 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub001:383777:383854 [3] NCCL INFO Connected all trees
+gpub001:383777:383854 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:383777:383854 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:383777:383854 [3] NCCL INFO comm 0xc243d9d0 rank 3 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub030:2310659:2310659 [2] NCCL INFO cudaDriverVersion 12010
+gpub030:2310659:2310659 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2310659:2310659 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2310659:2310728 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2310659:2310728 [2] NCCL INFO Using network IB
+gpub030:2310659:2310728 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub030:2310659:2310728 [2] NCCL INFO Trees [0] 23/-1/-1->22->21 [1] 23/-1/-1->22->21
+gpub030:2310659:2310728 [2] NCCL INFO Channel 00/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub030:2310659:2310728 [2] NCCL INFO Channel 01/0 : 22[85000] -> 23[c7000] via P2P/IPC
+gpub030:2310659:2310728 [2] NCCL INFO Connected all rings
+gpub030:2310659:2310728 [2] NCCL INFO Channel 00/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub030:2310659:2310728 [2] NCCL INFO Channel 01/0 : 22[85000] -> 21[46000] via P2P/IPC
+gpub030:2310659:2310728 [2] NCCL INFO Connected all trees
+gpub030:2310659:2310728 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2310659:2310728 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2310659:2310728 [2] NCCL INFO comm 0x8de12f60 rank 22 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub030:2310657:2310657 [0] NCCL INFO cudaDriverVersion 12010
+gpub030:2310657:2310657 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2310657:2310657 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2310657:2310726 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2310657:2310726 [0] NCCL INFO Using network IB
+gpub030:2310657:2310726 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub030:2310657:2310726 [0] NCCL INFO Trees [0] 21/-1/-1->20->25 [1] 21/16/-1->20->13
+gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 19[c7000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 21[46000] via P2P/IPC
+gpub030:2310657:2310726 [0] NCCL INFO Connected all rings
+gpub060:1938143:1938143 [0] NCCL INFO cudaDriverVersion 12010
+gpub060:1938143:1938143 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0>
+gpub060:1938143:1938143 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub060:1938143:1938219 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0>
+gpub060:1938143:1938219 [0] NCCL INFO Using network IB
+gpub060:1938143:1938219 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub060:1938143:1938219 [0] NCCL INFO Trees [0] 37/-1/-1->36->41 [1] 37/32/-1->36->44
+gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [receive] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 36[7000] -> 37[46000] via P2P/IPC
+gpub060:1938143:1938219 [0] NCCL INFO Connected all rings
+gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 16[7000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [send] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [send] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [receive] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Channel 01/0 : 20[7000] -> 16[7000] [send] via NET/IB/0
+gpub030:2310657:2310726 [0] NCCL INFO Connected all trees
+gpub030:2310657:2310726 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2310657:2310726 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2310657:2310726 [0] NCCL INFO comm 0x50d929d0 rank 20 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [receive] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [send] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 36[7000] -> 44[7000] [send] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 44[7000] -> 36[7000] [receive] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [receive] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [send] via NET/IB/0
+gpub060:1938143:1938219 [0] NCCL INFO Connected all trees
+gpub060:1938143:1938219 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub060:1938143:1938219 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub060:1938143:1938219 [0] NCCL INFO comm 0x50561020 rank 36 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub060:1938144:1938144 [1] NCCL INFO cudaDriverVersion 12010
+gpub060:1938144:1938144 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0>
+gpub060:1938144:1938144 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub060:1938144:1938217 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0>
+gpub060:1938144:1938217 [1] NCCL INFO Using network IB
+gpub060:1938144:1938217 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub060:1938144:1938217 [1] NCCL INFO Trees [0] 38/-1/-1->37->36 [1] 38/40/-1->37->36
+gpub060:1938144:1938217 [1] NCCL INFO Channel 00/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 37[46000] -> 38[85000] via P2P/IPC
+gpub060:1938144:1938217 [1] NCCL INFO Connected all rings
+gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [send] via NET/IB/0
+gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [receive] via NET/IB/0
+gpub060:1938144:1938217 [1] NCCL INFO Channel 00/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub060:1938144:1938217 [1] NCCL INFO Channel 01/0 : 37[46000] -> 36[7000] via P2P/IPC
+gpub060:1938144:1938217 [1] NCCL INFO Connected all trees
+gpub060:1938144:1938217 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub060:1938144:1938217 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub060:1938144:1938217 [1] NCCL INFO comm 0x4f3bc650 rank 37 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:2616806:2616806 [3] NCCL INFO cudaDriverVersion 12010
+gpub079:2616806:2616806 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2616806:2616806 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2616806:2616881 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2616806:2616881 [3] NCCL INFO Using network IB
+gpub079:2616806:2616881 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub079:2616806:2616881 [3] NCCL INFO Trees [0] -1/-1/-1->59->58 [1] -1/-1/-1->59->58
+gpub079:2616806:2616881 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub079:2616806:2616881 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [send] via NET/IB/0
+gpub079:2616806:2616881 [3] NCCL INFO Connected all rings
+gpub079:2616806:2616881 [3] NCCL INFO Channel 00/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub079:2616806:2616881 [3] NCCL INFO Channel 01/0 : 59[c7000] -> 58[85000] via P2P/IPC
+gpub079:2616806:2616881 [3] NCCL INFO Connected all trees
+gpub079:2616806:2616881 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2616806:2616881 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2616806:2616881 [3] NCCL INFO comm 0x89762f0 rank 59 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub066:1432046:1432046 [1] NCCL INFO cudaDriverVersion 12010
+gpub066:1432046:1432046 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1432046:1432046 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1432046:1432129 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1432046:1432129 [1] NCCL INFO Using network IB
+gpub066:1432046:1432129 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub066:1432046:1432129 [1] NCCL INFO Trees [0] 42/36/-1->41->40 [1] 42/-1/-1->41->40
+gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub066:1432046:1432129 [1] NCCL INFO Channel 01/0 : 41[46000] -> 42[85000] via P2P/IPC
+gpub066:1432046:1432129 [1] NCCL INFO Connected all rings
+gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 36[7000] -> 41[46000] [receive] via NET/IB/0
+gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 41[46000] -> 36[7000] [send] via NET/IB/0
+gpub066:1432046:1432129 [1] NCCL INFO Channel 00/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub066:1432046:1432129 [1] NCCL INFO Channel 01/0 : 41[46000] -> 40[7000] via P2P/IPC
+gpub066:1432046:1432129 [1] NCCL INFO Connected all trees
+gpub066:1432046:1432129 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1432046:1432129 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1432046:1432129 [1] NCCL INFO comm 0x4fabed20 rank 41 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub066:1432045:1432045 [0] NCCL INFO cudaDriverVersion 12010
+gpub066:1432045:1432045 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1432045:1432045 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1432045:1432128 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1432045:1432128 [0] NCCL INFO Using network IB
+gpub066:1432045:1432128 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub066:1432045:1432128 [0] NCCL INFO Trees [0] 41/44/-1->40->49 [1] 41/-1/-1->40->37
+gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 40[7000] -> 41[46000] via P2P/IPC
+gpub066:1432045:1432128 [0] NCCL INFO Connected all rings
+gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 37[46000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 40[7000] -> 44[7000] [send] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 40[7000] -> 49[46000] [send] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 49[46000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 00/0 : 44[7000] -> 40[7000] [receive] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Channel 01/0 : 40[7000] -> 37[46000] [send] via NET/IB/0
+gpub066:1432045:1432128 [0] NCCL INFO Connected all trees
+gpub066:1432045:1432128 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1432045:1432128 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1432045:1432128 [0] NCCL INFO comm 0x50653520 rank 40 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub016:1380824:1380824 [3] NCCL INFO cudaDriverVersion 12010
+gpub016:1380824:1380824 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1380824:1380824 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1380824:1380897 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1380824:1380897 [3] NCCL INFO Using network IB
+gpub016:1380824:1380897 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub016:1380824:1380897 [3] NCCL INFO Trees [0] -1/-1/-1->15->14 [1] -1/-1/-1->15->14
+gpub016:1380824:1380897 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub016:1380824:1380897 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 16[7000] [send] via NET/IB/0
+gpub016:1380824:1380897 [3] NCCL INFO Connected all rings
+gpub016:1380824:1380897 [3] NCCL INFO Channel 00/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub016:1380824:1380897 [3] NCCL INFO Channel 01/0 : 15[c7000] -> 14[85000] via P2P/IPC
+gpub016:1380824:1380897 [3] NCCL INFO Connected all trees
+gpub016:1380824:1380897 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub016:1380824:1380897 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1380824:1380897 [3] NCCL INFO comm 0x8d241cc0 rank 15 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub031:1878314:1878314 [3] NCCL INFO cudaDriverVersion 12010
+gpub031:1878314:1878314 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1878314:1878314 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1878314:1878391 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1878314:1878391 [3] NCCL INFO Using network IB
+gpub031:1878314:1878391 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub031:1878314:1878391 [3] NCCL INFO Trees [0] -1/-1/-1->27->26 [1] -1/-1/-1->27->26
+gpub031:1878314:1878391 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub031:1878314:1878391 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [send] via NET/IB/0
+gpub031:1878314:1878391 [3] NCCL INFO Connected all rings
+gpub031:1878314:1878391 [3] NCCL INFO Channel 00/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub031:1878314:1878391 [3] NCCL INFO Channel 01/0 : 27[c7000] -> 26[85000] via P2P/IPC
+gpub066:1432048:1432048 [3] NCCL INFO cudaDriverVersion 12010
+gpub066:1432048:1432048 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1432048:1432048 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1432048:1432126 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1432048:1432126 [3] NCCL INFO Using network IB
+gpub066:1432048:1432126 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub066:1432048:1432126 [3] NCCL INFO Trees [0] -1/-1/-1->43->42 [1] -1/-1/-1->43->42
+gpub066:1432048:1432126 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub066:1432048:1432126 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 44[7000] [send] via NET/IB/0
+gpub066:1432048:1432126 [3] NCCL INFO Connected all rings
+gpub066:1432048:1432126 [3] NCCL INFO Channel 00/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub066:1432048:1432126 [3] NCCL INFO Channel 01/0 : 43[c7000] -> 42[85000] via P2P/IPC
+gpub031:1878314:1878391 [3] NCCL INFO Connected all trees
+gpub031:1878314:1878391 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1878314:1878391 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1878314:1878391 [3] NCCL INFO comm 0x511daaa0 rank 27 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub066:1432048:1432126 [3] NCCL INFO Connected all trees
+gpub066:1432048:1432126 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1432048:1432126 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1432048:1432126 [3] NCCL INFO comm 0x51126a70 rank 43 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub067:1390516:1390516 [3] NCCL INFO cudaDriverVersion 12010
+gpub067:1390516:1390516 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.167<0>
+gpub067:1390516:1390516 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub067:1390516:1390585 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.167<0>
+gpub067:1390516:1390585 [3] NCCL INFO Using network IB
+gpub067:1390516:1390585 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub067:1390516:1390585 [3] NCCL INFO Trees [0] -1/-1/-1->47->46 [1] -1/-1/-1->47->46
+gpub067:1390516:1390585 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub067:1390516:1390585 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 48[7000] [send] via NET/IB/0
+gpub067:1390516:1390585 [3] NCCL INFO Connected all rings
+gpub067:1390516:1390585 [3] NCCL INFO Channel 00/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub067:1390516:1390585 [3] NCCL INFO Channel 01/0 : 47[c7000] -> 46[85000] via P2P/IPC
+gpub067:1390516:1390585 [3] NCCL INFO Connected all trees
+gpub067:1390516:1390585 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub067:1390516:1390585 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub067:1390516:1390585 [3] NCCL INFO comm 0x509fc1c0 rank 47 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub031:1878313:1878313 [2] NCCL INFO cudaDriverVersion 12010
+gpub031:1878313:1878313 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1878313:1878313 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1878313:1878389 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1878313:1878389 [2] NCCL INFO Using network IB
+gpub031:1878313:1878389 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub031:1878313:1878389 [2] NCCL INFO Trees [0] 27/-1/-1->26->25 [1] 27/-1/-1->26->25
+gpub031:1878313:1878389 [2] NCCL INFO Channel 00/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub031:1878313:1878389 [2] NCCL INFO Channel 01/0 : 26[85000] -> 27[c7000] via P2P/IPC
+gpub031:1878313:1878389 [2] NCCL INFO Connected all rings
+gpub031:1878313:1878389 [2] NCCL INFO Channel 00/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub031:1878313:1878389 [2] NCCL INFO Channel 01/0 : 26[85000] -> 25[46000] via P2P/IPC
+gpub031:1878313:1878389 [2] NCCL INFO Connected all trees
+gpub031:1878313:1878389 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1878313:1878389 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1878313:1878389 [2] NCCL INFO comm 0xa54f400 rank 26 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub001:383774:383852 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.101<0>
+gpub001:383774:383852 [0] NCCL INFO Using network IB
+gpub001:383774:383852 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub001:383774:383852 [0] NCCL INFO Channel 00/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:383774:383852 [0] NCCL INFO Channel 01/02 :    0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19
+gpub001:383774:383852 [0] NCCL INFO Trees [0] 1/32/-1->0->-1 [1] 1/-1/-1->0->4
+gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub001:383774:383852 [0] NCCL INFO Connected all rings
+gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 0[7000] -> 4[7000] [send] via NET/IB/0
+gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:383774:383852 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [send] via NET/IB/0
+gpub001:383774:383852 [0] NCCL INFO Channel 01/0 : 4[7000] -> 0[7000] [receive] via NET/IB/0
+gpub001:383774:383852 [0] NCCL INFO Connected all trees
+gpub001:383774:383852 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub001:383774:383852 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub001:383774:383852 [0] NCCL INFO comm 0x9b744f70 rank 0 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub059:1894384:1894384 [1] NCCL INFO cudaDriverVersion 12010
+gpub059:1894384:1894384 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1894384:1894384 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1894384:1894459 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1894384:1894459 [1] NCCL INFO Using network IB
+gpub059:1894384:1894459 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub059:1894384:1894459 [1] NCCL INFO Trees [0] 34/16/-1->33->32 [1] 34/-1/-1->33->32
+gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub059:1894384:1894459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 34[85000] via P2P/IPC
+gpub059:1894384:1894459 [1] NCCL INFO Connected all rings
+gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 16[7000] -> 33[46000] [receive] via NET/IB/0
+gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 16[7000] [send] via NET/IB/0
+gpub059:1894384:1894459 [1] NCCL INFO Channel 00/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub059:1894384:1894459 [1] NCCL INFO Channel 01/0 : 33[46000] -> 32[7000] via P2P/IPC
+gpub059:1894384:1894459 [1] NCCL INFO Connected all trees
+gpub059:1894384:1894459 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub059:1894384:1894459 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1894384:1894459 [1] NCCL INFO comm 0xb7b49460 rank 33 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub016:1380821:1380821 [0] NCCL INFO cudaDriverVersion 12010
+gpub016:1380821:1380821 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1380821:1380821 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1380821:1380899 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1380821:1380899 [0] NCCL INFO Using network IB
+gpub016:1380821:1380899 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub016:1380821:1380899 [0] NCCL INFO Trees [0] 13/-1/-1->12->8 [1] 13/4/-1->12->28
+gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 11[c7000] -> 12[7000] [receive] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 12[7000] -> 13[46000] via P2P/IPC
+gpub016:1380821:1380899 [0] NCCL INFO Connected all rings
+gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 8[7000] -> 12[7000] [receive] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 4[7000] -> 12[7000] [receive] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [send] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [receive] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 01/0 : 12[7000] -> 4[7000] [send] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Channel 00/0 : 12[7000] -> 8[7000] [send] via NET/IB/0
+gpub016:1380821:1380899 [0] NCCL INFO Connected all trees
+gpub016:1380821:1380899 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub016:1380821:1380899 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1380821:1380899 [0] NCCL INFO comm 0x50896990 rank 12 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub077:252894:252894 [2] NCCL INFO cudaDriverVersion 12010
+gpub077:252894:252894 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:252894:252894 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:252894:252964 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:252894:252964 [2] NCCL INFO Using network IB
+gpub077:252894:252964 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub077:252894:252964 [2] NCCL INFO Trees [0] 55/-1/-1->54->53 [1] 55/-1/-1->54->53
+gpub077:252894:252964 [2] NCCL INFO Channel 00/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub077:252894:252964 [2] NCCL INFO Channel 01/0 : 54[85000] -> 55[c7000] via P2P/IPC
+gpub077:252894:252964 [2] NCCL INFO Connected all rings
+gpub077:252894:252964 [2] NCCL INFO Channel 00/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub077:252894:252964 [2] NCCL INFO Channel 01/0 : 54[85000] -> 53[46000] via P2P/IPC
+gpub077:252894:252964 [2] NCCL INFO Connected all trees
+gpub077:252894:252964 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:252894:252964 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:252894:252964 [2] NCCL INFO comm 0xc19a4b40 rank 54 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub032:3246892:3246892 [0] NCCL INFO cudaDriverVersion 12010
+gpub032:3246892:3246892 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3246892:3246892 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3246892:3246974 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3246892:3246974 [0] NCCL INFO Using network IB
+gpub032:3246892:3246974 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub032:3246892:3246974 [0] NCCL INFO Trees [0] 29/-1/-1->28->24 [1] 29/12/-1->28->60
+gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 27[c7000] -> 28[7000] [receive] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 28[7000] -> 29[46000] via P2P/IPC
+gpub032:3246892:3246974 [0] NCCL INFO Connected all rings
+gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [receive] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 12[7000] -> 28[7000] [receive] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [receive] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [send] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 01/0 : 28[7000] -> 12[7000] [send] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [send] via NET/IB/0
+gpub032:3246892:3246974 [0] NCCL INFO Connected all trees
+gpub032:3246892:3246974 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3246892:3246974 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3246892:3246974 [0] NCCL INFO comm 0x4ff3dba0 rank 28 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub079:2616804:2616804 [1] NCCL INFO cudaDriverVersion 12010
+gpub079:2616804:2616804 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2616804:2616804 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2616804:2616880 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2616804:2616880 [1] NCCL INFO Using network IB
+gpub079:2616804:2616880 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub079:2616804:2616880 [1] NCCL INFO Trees [0] 58/52/-1->57->56 [1] 58/-1/-1->57->56
+gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub079:2616804:2616880 [1] NCCL INFO Channel 01/0 : 57[46000] -> 58[85000] via P2P/IPC
+gpub079:2616804:2616880 [1] NCCL INFO Connected all rings
+gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 52[7000] -> 57[46000] [receive] via NET/IB/0
+gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 57[46000] -> 52[7000] [send] via NET/IB/0
+gpub079:2616804:2616880 [1] NCCL INFO Channel 00/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub079:2616804:2616880 [1] NCCL INFO Channel 01/0 : 57[46000] -> 56[7000] via P2P/IPC
+gpub079:2616804:2616880 [1] NCCL INFO Connected all trees
+gpub079:2616804:2616880 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2616804:2616880 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2616804:2616880 [1] NCCL INFO comm 0x9014adc0 rank 57 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub079:2616805:2616805 [2] NCCL INFO cudaDriverVersion 12010
+gpub079:2616805:2616805 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.179<0>
+gpub079:2616805:2616805 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub079:2616805:2616882 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.179<0>
+gpub079:2616805:2616882 [2] NCCL INFO Using network IB
+gpub079:2616805:2616882 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub079:2616805:2616882 [2] NCCL INFO Trees [0] 59/-1/-1->58->57 [1] 59/-1/-1->58->57
+gpub079:2616805:2616882 [2] NCCL INFO Channel 00/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub079:2616805:2616882 [2] NCCL INFO Channel 01/0 : 58[85000] -> 59[c7000] via P2P/IPC
+gpub079:2616805:2616882 [2] NCCL INFO Connected all rings
+gpub079:2616805:2616882 [2] NCCL INFO Channel 00/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub079:2616805:2616882 [2] NCCL INFO Channel 01/0 : 58[85000] -> 57[46000] via P2P/IPC
+gpub079:2616805:2616882 [2] NCCL INFO Connected all trees
+gpub079:2616805:2616882 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub079:2616805:2616882 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub079:2616805:2616882 [2] NCCL INFO comm 0x8b2c9c20 rank 58 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub060:1938146:1938146 [3] NCCL INFO cudaDriverVersion 12010
+gpub060:1938146:1938146 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.160<0>
+gpub060:1938146:1938146 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub060:1938146:1938220 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.160<0>
+gpub060:1938146:1938220 [3] NCCL INFO Using network IB
+gpub060:1938146:1938220 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub060:1938146:1938220 [3] NCCL INFO Trees [0] -1/-1/-1->39->38 [1] -1/-1/-1->39->38
+gpub060:1938146:1938220 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub060:1938146:1938220 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 40[7000] [send] via NET/IB/0
+gpub060:1938146:1938220 [3] NCCL INFO Connected all rings
+gpub060:1938146:1938220 [3] NCCL INFO Channel 00/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub060:1938146:1938220 [3] NCCL INFO Channel 01/0 : 39[c7000] -> 38[85000] via P2P/IPC
+gpub060:1938146:1938220 [3] NCCL INFO Connected all trees
+gpub060:1938146:1938220 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub060:1938146:1938220 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub060:1938146:1938220 [3] NCCL INFO comm 0x50addeb0 rank 39 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub016:1380822:1380822 [1] NCCL INFO cudaDriverVersion 12010
+gpub016:1380822:1380822 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.116<0>
+gpub016:1380822:1380822 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub016:1380822:1380898 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.116<0>
+gpub016:1380822:1380898 [1] NCCL INFO Using network IB
+gpub016:1380822:1380898 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub016:1380822:1380898 [1] NCCL INFO Trees [0] 14/-1/-1->13->12 [1] 14/20/-1->13->12
+gpub016:1380822:1380898 [1] NCCL INFO Channel 00/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 13[46000] -> 14[85000] via P2P/IPC
+gpub016:1380822:1380898 [1] NCCL INFO Connected all rings
+gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 13[46000] -> 20[7000] [send] via NET/IB/0
+gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 20[7000] -> 13[46000] [receive] via NET/IB/0
+gpub016:1380822:1380898 [1] NCCL INFO Channel 00/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub016:1380822:1380898 [1] NCCL INFO Channel 01/0 : 13[46000] -> 12[7000] via P2P/IPC
+gpub016:1380822:1380898 [1] NCCL INFO Connected all trees
+gpub016:1380822:1380898 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub016:1380822:1380898 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub016:1380822:1380898 [1] NCCL INFO comm 0x9b8bb7a0 rank 13 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub066:1432047:1432047 [2] NCCL INFO cudaDriverVersion 12010
+gpub066:1432047:1432047 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.166<0>
+gpub066:1432047:1432047 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub066:1432047:1432127 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.166<0>
+gpub066:1432047:1432127 [2] NCCL INFO Using network IB
+gpub066:1432047:1432127 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub066:1432047:1432127 [2] NCCL INFO Trees [0] 43/-1/-1->42->41 [1] 43/-1/-1->42->41
+gpub066:1432047:1432127 [2] NCCL INFO Channel 00/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub066:1432047:1432127 [2] NCCL INFO Channel 01/0 : 42[85000] -> 43[c7000] via P2P/IPC
+gpub066:1432047:1432127 [2] NCCL INFO Connected all rings
+gpub066:1432047:1432127 [2] NCCL INFO Channel 00/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub066:1432047:1432127 [2] NCCL INFO Channel 01/0 : 42[85000] -> 41[46000] via P2P/IPC
+gpub066:1432047:1432127 [2] NCCL INFO Connected all trees
+gpub066:1432047:1432127 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub066:1432047:1432127 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub066:1432047:1432127 [2] NCCL INFO comm 0x9ed0150 rank 42 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub030:2310658:2310658 [1] NCCL INFO cudaDriverVersion 12010
+gpub030:2310658:2310658 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2310658:2310658 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2310658:2310725 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2310658:2310725 [1] NCCL INFO Using network IB
+gpub030:2310658:2310725 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub030:2310658:2310725 [1] NCCL INFO Trees [0] 22/-1/-1->21->20 [1] 22/24/-1->21->20
+gpub030:2310658:2310725 [1] NCCL INFO Channel 00/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 22[85000] via P2P/IPC
+gpub030:2310658:2310725 [1] NCCL INFO Connected all rings
+gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [send] via NET/IB/0
+gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [receive] via NET/IB/0
+gpub030:2310658:2310725 [1] NCCL INFO Channel 00/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub030:2310658:2310725 [1] NCCL INFO Channel 01/0 : 21[46000] -> 20[7000] via P2P/IPC
+gpub030:2310658:2310725 [1] NCCL INFO Connected all trees
+gpub030:2310658:2310725 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2310658:2310725 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2310658:2310725 [1] NCCL INFO comm 0x50672d50 rank 21 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub059:1894383:1894383 [0] NCCL INFO cudaDriverVersion 12010
+gpub059:1894383:1894383 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1894383:1894383 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1894383:1894458 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1894383:1894458 [0] NCCL INFO Using network IB
+gpub059:1894383:1894458 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub059:1894383:1894458 [0] NCCL INFO Trees [0] 33/48/-1->32->0 [1] 33/-1/-1->32->36
+gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [receive] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 32[7000] -> 33[46000] via P2P/IPC
+gpub059:1894383:1894458 [0] NCCL INFO Connected all rings
+gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 32[7000] -> 36[7000] [send] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 32[7000] -> 48[7000] [send] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 0[7000] -> 32[7000] [receive] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 32[7000] -> 0[7000] [send] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 00/0 : 48[7000] -> 32[7000] [receive] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Channel 01/0 : 36[7000] -> 32[7000] [receive] via NET/IB/0
+gpub059:1894383:1894458 [0] NCCL INFO Connected all trees
+gpub059:1894383:1894458 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub059:1894383:1894458 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1894383:1894458 [0] NCCL INFO comm 0x510467d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub059:1894386:1894386 [3] NCCL INFO cudaDriverVersion 12010
+gpub059:1894386:1894386 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1894386:1894386 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1894386:1894456 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1894386:1894456 [3] NCCL INFO Using network IB
+gpub059:1894386:1894456 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub059:1894386:1894456 [3] NCCL INFO Trees [0] -1/-1/-1->35->34 [1] -1/-1/-1->35->34
+gpub059:1894386:1894456 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub059:1894386:1894456 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 36[7000] [send] via NET/IB/0
+gpub059:1894386:1894456 [3] NCCL INFO Connected all rings
+gpub059:1894386:1894456 [3] NCCL INFO Channel 00/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub059:1894386:1894456 [3] NCCL INFO Channel 01/0 : 35[c7000] -> 34[85000] via P2P/IPC
+gpub059:1894386:1894456 [3] NCCL INFO Connected all trees
+gpub059:1894386:1894456 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub059:1894386:1894456 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1894386:1894456 [3] NCCL INFO comm 0x9cf1390 rank 35 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub059:1894385:1894385 [2] NCCL INFO cudaDriverVersion 12010
+gpub059:1894385:1894385 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.159<0>
+gpub059:1894385:1894385 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub059:1894385:1894457 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.159<0>
+gpub059:1894385:1894457 [2] NCCL INFO Using network IB
+gpub059:1894385:1894457 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub059:1894385:1894457 [2] NCCL INFO Trees [0] 35/-1/-1->34->33 [1] 35/-1/-1->34->33
+gpub059:1894385:1894457 [2] NCCL INFO Channel 00/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub059:1894385:1894457 [2] NCCL INFO Channel 01/0 : 34[85000] -> 35[c7000] via P2P/IPC
+gpub059:1894385:1894457 [2] NCCL INFO Connected all rings
+gpub059:1894385:1894457 [2] NCCL INFO Channel 00/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub059:1894385:1894457 [2] NCCL INFO Channel 01/0 : 34[85000] -> 33[46000] via P2P/IPC
+gpub059:1894385:1894457 [2] NCCL INFO Connected all trees
+gpub059:1894385:1894457 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub059:1894385:1894457 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub059:1894385:1894457 [2] NCCL INFO comm 0x50af3510 rank 34 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub030:2310660:2310660 [3] NCCL INFO cudaDriverVersion 12010
+gpub030:2310660:2310660 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.130<0>
+gpub030:2310660:2310660 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub030:2310660:2310727 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.130<0>
+gpub030:2310660:2310727 [3] NCCL INFO Using network IB
+gpub030:2310660:2310727 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub030:2310660:2310727 [3] NCCL INFO Trees [0] -1/-1/-1->23->22 [1] -1/-1/-1->23->22
+gpub030:2310660:2310727 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub030:2310660:2310727 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [send] via NET/IB/0
+gpub030:2310660:2310727 [3] NCCL INFO Connected all rings
+gpub030:2310660:2310727 [3] NCCL INFO Channel 00/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub030:2310660:2310727 [3] NCCL INFO Channel 01/0 : 23[c7000] -> 22[85000] via P2P/IPC
+gpub030:2310660:2310727 [3] NCCL INFO Connected all trees
+gpub030:2310660:2310727 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub030:2310660:2310727 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub030:2310660:2310727 [3] NCCL INFO comm 0xa84d3a10 rank 23 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub031:1878312:1878312 [1] NCCL INFO cudaDriverVersion 12010
+gpub031:1878312:1878312 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1878312:1878312 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1878312:1878390 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1878312:1878390 [1] NCCL INFO Using network IB
+gpub031:1878312:1878390 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub031:1878312:1878390 [1] NCCL INFO Trees [0] 26/20/-1->25->24 [1] 26/-1/-1->25->24
+gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub031:1878312:1878390 [1] NCCL INFO Channel 01/0 : 25[46000] -> 26[85000] via P2P/IPC
+gpub031:1878312:1878390 [1] NCCL INFO Connected all rings
+gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 20[7000] -> 25[46000] [receive] via NET/IB/0
+gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 25[46000] -> 20[7000] [send] via NET/IB/0
+gpub031:1878312:1878390 [1] NCCL INFO Channel 00/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub031:1878312:1878390 [1] NCCL INFO Channel 01/0 : 25[46000] -> 24[7000] via P2P/IPC
+gpub031:1878312:1878390 [1] NCCL INFO Connected all trees
+gpub031:1878312:1878390 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1878312:1878390 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1878312:1878390 [1] NCCL INFO comm 0x509faf60 rank 25 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+gpub032:3246895:3246895 [3] NCCL INFO cudaDriverVersion 12010
+gpub032:3246895:3246895 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.132<0>
+gpub032:3246895:3246895 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub032:3246895:3246973 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.132<0>
+gpub032:3246895:3246973 [3] NCCL INFO Using network IB
+gpub032:3246895:3246973 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub032:3246895:3246973 [3] NCCL INFO Trees [0] -1/-1/-1->31->30 [1] -1/-1/-1->31->30
+gpub032:3246895:3246973 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub032:3246895:3246973 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 32[7000] [send] via NET/IB/0
+gpub032:3246895:3246973 [3] NCCL INFO Connected all rings
+gpub032:3246895:3246973 [3] NCCL INFO Channel 00/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub032:3246895:3246973 [3] NCCL INFO Channel 01/0 : 31[c7000] -> 30[85000] via P2P/IPC
+gpub032:3246895:3246973 [3] NCCL INFO Connected all trees
+gpub032:3246895:3246973 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub032:3246895:3246973 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub032:3246895:3246973 [3] NCCL INFO comm 0x1b5e5670 rank 31 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub077:252895:252895 [3] NCCL INFO cudaDriverVersion 12010
+gpub077:252895:252895 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.177<0>
+gpub077:252895:252895 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub077:252895:252963 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.177<0>
+gpub077:252895:252963 [3] NCCL INFO Using network IB
+gpub077:252895:252963 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub077:252895:252963 [3] NCCL INFO Trees [0] -1/-1/-1->55->54 [1] -1/-1/-1->55->54
+gpub077:252895:252963 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub077:252895:252963 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 56[7000] [send] via NET/IB/0
+gpub077:252895:252963 [3] NCCL INFO Connected all rings
+gpub077:252895:252963 [3] NCCL INFO Channel 00/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub077:252895:252963 [3] NCCL INFO Channel 01/0 : 55[c7000] -> 54[85000] via P2P/IPC
+gpub077:252895:252963 [3] NCCL INFO Connected all trees
+gpub077:252895:252963 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub077:252895:252963 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub077:252895:252963 [3] NCCL INFO comm 0x9491900 rank 55 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub031:1878311:1878311 [0] NCCL INFO cudaDriverVersion 12010
+gpub031:1878311:1878311 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.131<0>
+gpub031:1878311:1878311 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub031:1878311:1878392 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.131<0>
+gpub031:1878311:1878392 [0] NCCL INFO Using network IB
+gpub031:1878311:1878392 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub031:1878311:1878392 [0] NCCL INFO Trees [0] 25/28/-1->24->16 [1] 25/-1/-1->24->21
+gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 23[c7000] -> 24[7000] [receive] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 24[7000] -> 25[46000] via P2P/IPC
+gpub031:1878311:1878392 [0] NCCL INFO Connected all rings
+gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 21[46000] -> 24[7000] [receive] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 24[7000] -> 28[7000] [send] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 16[7000] -> 24[7000] [receive] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 24[7000] -> 16[7000] [send] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 00/0 : 28[7000] -> 24[7000] [receive] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Channel 01/0 : 24[7000] -> 21[46000] [send] via NET/IB/0
+gpub031:1878311:1878392 [0] NCCL INFO Connected all trees
+gpub031:1878311:1878392 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub031:1878311:1878392 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub031:1878311:1878392 [0] NCCL INFO comm 0xba515710 rank 24 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub096:1440104:1440104 [3] NCCL INFO cudaDriverVersion 12010
+gpub096:1440104:1440104 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1440104:1440104 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1440104:1440176 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1440104:1440176 [3] NCCL INFO Using network IB
+gpub096:1440104:1440176 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub096:1440104:1440176 [3] NCCL INFO Trees [0] -1/-1/-1->63->62 [1] -1/-1/-1->63->62
+gpub096:1440104:1440176 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub096:1440104:1440176 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 0[7000] [send] via NET/IB/0
+gpub096:1440104:1440176 [3] NCCL INFO Connected all rings
+gpub096:1440104:1440176 [3] NCCL INFO Channel 00/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub096:1440104:1440176 [3] NCCL INFO Channel 01/0 : 63[c7000] -> 62[85000] via P2P/IPC
+gpub096:1440104:1440176 [3] NCCL INFO Connected all trees
+gpub096:1440104:1440176 [3] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1440104:1440176 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1440104:1440176 [3] NCCL INFO comm 0x9f265ce0 rank 63 nranks 64 cudaDev 3 busId c7000 - Init COMPLETE
+gpub096:1440103:1440103 [2] NCCL INFO cudaDriverVersion 12010
+gpub096:1440103:1440103 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1440103:1440103 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1440103:1440178 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1440103:1440178 [2] NCCL INFO Using network IB
+gpub096:1440103:1440178 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub096:1440103:1440178 [2] NCCL INFO Trees [0] 63/-1/-1->62->61 [1] 63/-1/-1->62->61
+gpub096:1440103:1440178 [2] NCCL INFO Channel 00/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub096:1440103:1440178 [2] NCCL INFO Channel 01/0 : 62[85000] -> 63[c7000] via P2P/IPC
+gpub096:1440103:1440178 [2] NCCL INFO Connected all rings
+gpub096:1440103:1440178 [2] NCCL INFO Channel 00/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub096:1440103:1440178 [2] NCCL INFO Channel 01/0 : 62[85000] -> 61[46000] via P2P/IPC
+gpub096:1440103:1440178 [2] NCCL INFO Connected all trees
+gpub096:1440103:1440178 [2] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1440103:1440178 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1440103:1440178 [2] NCCL INFO comm 0x91c6060 rank 62 nranks 64 cudaDev 2 busId 85000 - Init COMPLETE
+gpub096:1440101:1440101 [0] NCCL INFO cudaDriverVersion 12010
+gpub096:1440101:1440101 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1440101:1440101 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1440101:1440177 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1440101:1440177 [0] NCCL INFO Using network IB
+gpub096:1440101:1440177 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub096:1440101:1440177 [0] NCCL INFO Trees [0] 61/-1/-1->60->56 [1] 61/28/-1->60->-1
+gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 59[c7000] -> 60[7000] [receive] via NET/IB/0
+gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 60[7000] -> 61[46000] via P2P/IPC
+gpub096:1440101:1440177 [0] NCCL INFO Connected all rings
+gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 56[7000] -> 60[7000] [receive] via NET/IB/0
+gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 28[7000] -> 60[7000] [receive] via NET/IB/0
+gpub096:1440101:1440177 [0] NCCL INFO Channel 01/0 : 60[7000] -> 28[7000] [send] via NET/IB/0
+gpub096:1440101:1440177 [0] NCCL INFO Channel 00/0 : 60[7000] -> 56[7000] [send] via NET/IB/0
+gpub096:1440101:1440177 [0] NCCL INFO Connected all trees
+gpub096:1440101:1440177 [0] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1440101:1440177 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1440101:1440177 [0] NCCL INFO comm 0x50b020d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Init COMPLETE
+gpub096:1440102:1440102 [1] NCCL INFO cudaDriverVersion 12010
+gpub096:1440102:1440102 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.196<0>
+gpub096:1440102:1440102 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub096:1440102:1440179 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.196<0>
+gpub096:1440102:1440179 [1] NCCL INFO Using network IB
+gpub096:1440102:1440179 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub096:1440102:1440179 [1] NCCL INFO Trees [0] 62/-1/-1->61->60 [1] 62/-1/-1->61->60
+gpub096:1440102:1440179 [1] NCCL INFO Channel 00/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub096:1440102:1440179 [1] NCCL INFO Channel 01/0 : 61[46000] -> 62[85000] via P2P/IPC
+gpub096:1440102:1440179 [1] NCCL INFO Connected all rings
+gpub096:1440102:1440179 [1] NCCL INFO Channel 00/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub096:1440102:1440179 [1] NCCL INFO Channel 01/0 : 61[46000] -> 60[7000] via P2P/IPC
+gpub096:1440102:1440179 [1] NCCL INFO Connected all trees
+gpub096:1440102:1440179 [1] NCCL INFO threadThresholds 8/8/64 | 512/8/64 | 512 | 512
+gpub096:1440102:1440179 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
+gpub096:1440102:1440179 [1] NCCL INFO comm 0x50d96930 rank 61 nranks 64 cudaDev 1 busId 46000 - Init COMPLETE
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[W reducer.cpp:1298] Warning: find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass. This flag results in an extra traversal of the autograd graph every iteration,  which can adversely affect performance. If your model indeed never has any unused parameters in the forward pass, consider turning this flag off. Note that this warning may be a false positive if your model has flow control causing later iterations to have unused parameters. (function operator())
+[gpub001:0/64] 2023-07-03 22:35:20,343 (trainer:732) INFO: 9epoch:train:1-100batch: iter_time=1.479, forward_time=0.254, loss_ctc=89.671, loss_att=65.418, acc=0.668, loss=72.694, backward_time=1.040, grad_norm=92.761, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.336e-04, train_time=8.067
+[gpub001:0/64] 2023-07-03 22:37:35,916 (trainer:732) INFO: 9epoch:train:101-200batch: iter_time=1.205e-04, forward_time=0.142, loss_ctc=77.074, loss_att=59.177, acc=0.644, loss=64.546, backward_time=1.024, grad_norm=98.231, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.335e-04, train_time=2.714
+[gpub001:0/64] 2023-07-03 22:39:51,736 (trainer:732) INFO: 9epoch:train:201-300batch: iter_time=1.193e-04, forward_time=0.141, loss_ctc=83.406, loss_att=66.282, acc=0.665, loss=71.419, backward_time=1.025, grad_norm=105.432, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.334e-04, train_time=2.716
+[gpub001:0/64] 2023-07-03 22:42:06,607 (trainer:732) INFO: 9epoch:train:301-400batch: iter_time=1.219e-04, forward_time=0.142, loss_ctc=70.602, loss_att=53.997, acc=0.653, loss=58.979, backward_time=1.022, grad_norm=85.786, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.333e-04, train_time=2.697
+[gpub001:0/64] 2023-07-03 22:44:49,865 (trainer:732) INFO: 9epoch:train:401-500batch: iter_time=1.219e-04, forward_time=0.142, loss_ctc=84.185, loss_att=67.677, acc=0.649, loss=72.629, backward_time=1.077, grad_norm=101.589, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.332e-04, train_time=3.265
+[gpub001:0/64] 2023-07-03 22:47:04,832 (trainer:732) INFO: 9epoch:train:501-600batch: iter_time=1.129e-04, forward_time=0.140, loss_ctc=77.007, loss_att=63.663, acc=0.649, loss=67.666, backward_time=1.021, grad_norm=119.077, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.331e-04, train_time=2.699
+[gpub001:0/64] 2023-07-03 22:49:20,582 (trainer:732) INFO: 9epoch:train:601-700batch: iter_time=1.158e-04, forward_time=0.142, loss_ctc=69.128, loss_att=50.937, acc=0.670, loss=56.394, backward_time=1.023, grad_norm=85.221, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.330e-04, train_time=2.715
+[gpub001:0/64] 2023-07-03 22:51:38,011 (trainer:732) INFO: 9epoch:train:701-800batch: iter_time=1.240e-04, forward_time=0.142, loss_ctc=87.200, loss_att=70.450, acc=0.661, loss=75.475, backward_time=1.023, grad_norm=107.449, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.329e-04, train_time=2.748
+[gpub001:0/64] 2023-07-03 22:54:40,138 (trainer:732) INFO: 9epoch:train:801-900batch: iter_time=1.192e-04, forward_time=0.142, loss_ctc=84.642, loss_att=62.734, acc=0.671, loss=69.306, backward_time=1.081, grad_norm=87.657, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.328e-04, train_time=3.642
+[gpub001:0/64] 2023-07-03 22:57:27,498 (trainer:732) INFO: 9epoch:train:901-1000batch: iter_time=1.098e-04, forward_time=0.141, loss_ctc=84.316, loss_att=59.445, acc=0.671, loss=66.906, backward_time=1.063, grad_norm=104.559, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.327e-04, train_time=3.347
+[gpub001:0/64] 2023-07-03 22:57:41,932 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-03 22:58:04,079 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-03 22:58:08,258 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f958aeb5180>)
+[gpub001:0/64] 2023-07-03 22:58:08,259 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-03 22:58:08,266 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-03 23:04:34,809 (trainer:732) INFO: 9epoch:train:1001-1100batch: iter_time=2.754, forward_time=0.180, loss_ctc=90.046, loss_att=65.751, acc=0.658, loss=73.039, backward_time=1.044, grad_norm=97.475, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.326e-04, train_time=8.546
+[gpub001:0/64] 2023-07-03 23:06:58,236 (trainer:732) INFO: 9epoch:train:1101-1200batch: iter_time=1.332e-04, forward_time=0.144, loss_ctc=77.169, loss_att=57.792, acc=0.640, loss=63.605, backward_time=1.031, grad_norm=84.645, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.325e-04, train_time=2.869
+[gpub001:0/64] 2023-07-03 23:09:20,632 (trainer:732) INFO: 9epoch:train:1201-1300batch: iter_time=1.286e-04, forward_time=0.144, loss_ctc=83.261, loss_att=65.116, acc=0.660, loss=70.559, backward_time=1.033, grad_norm=92.693, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.325e-04, train_time=2.848
+[gpub001:0/64] 2023-07-03 23:11:45,452 (trainer:732) INFO: 9epoch:train:1301-1400batch: iter_time=1.543e-04, forward_time=0.145, loss_ctc=69.216, loss_att=52.051, acc=0.651, loss=57.200, backward_time=1.030, grad_norm=85.186, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.324e-04, train_time=2.896
+[gpub001:0/64] 2023-07-03 23:14:11,669 (trainer:732) INFO: 9epoch:train:1401-1500batch: iter_time=1.219e-04, forward_time=0.145, loss_ctc=81.472, loss_att=65.882, acc=0.654, loss=70.559, backward_time=1.051, grad_norm=98.760, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.323e-04, train_time=2.924
+[gpub001:0/64] 2023-07-03 23:16:47,813 (trainer:732) INFO: 9epoch:train:1501-1600batch: iter_time=1.272e-04, forward_time=0.144, loss_ctc=75.580, loss_att=63.756, acc=0.646, loss=67.303, backward_time=1.053, grad_norm=90.582, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.322e-04, train_time=3.123
+[gpub001:0/64] 2023-07-03 23:19:18,487 (trainer:732) INFO: 9epoch:train:1601-1700batch: iter_time=1.166e-04, forward_time=0.144, loss_ctc=70.019, loss_att=51.653, acc=0.666, loss=57.163, backward_time=1.035, grad_norm=88.067, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.321e-04, train_time=3.013
+[gpub001:0/64] 2023-07-03 23:22:07,029 (trainer:732) INFO: 9epoch:train:1701-1800batch: iter_time=1.322e-04, forward_time=0.145, loss_ctc=83.559, loss_att=68.890, acc=0.660, loss=73.291, backward_time=1.058, grad_norm=93.644, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.182, optim0_lr0=1.320e-04, train_time=3.371
+[gpub001:0/64] 2023-07-03 23:24:43,521 (trainer:732) INFO: 9epoch:train:1801-1900batch: iter_time=1.216e-04, forward_time=0.146, loss_ctc=82.347, loss_att=62.197, acc=0.665, loss=68.242, backward_time=1.071, grad_norm=81.502, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.319e-04, train_time=3.130
+[gpub001:0/64] 2023-07-03 23:27:16,221 (trainer:732) INFO: 9epoch:train:1901-2000batch: iter_time=1.205e-04, forward_time=0.145, loss_ctc=79.689, loss_att=57.173, acc=0.675, loss=63.928, backward_time=1.053, grad_norm=110.108, clip=100.000, loss_scale=8.590e+09, optim_step_time=0.181, optim0_lr0=1.318e-04, train_time=3.054
+[gpub001:0/64] 2023-07-03 23:27:18,258 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-03 23:27:40,440 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-03 23:27:44,994 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94ad239120>)
+[gpub001:0/64] 2023-07-03 23:27:44,994 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-03 23:27:45,002 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-03 23:33:19,007 (trainer:732) INFO: 9epoch:train:2001-2100batch: iter_time=1.581, forward_time=0.193, loss_ctc=87.970, loss_att=63.692, acc=0.678, loss=70.975, backward_time=1.046, grad_norm=89.931, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.192, optim0_lr0=1.317e-04, train_time=7.255
+[gpub001:0/64] 2023-07-03 23:35:35,590 (trainer:732) INFO: 9epoch:train:2101-2200batch: iter_time=1.287e-04, forward_time=0.145, loss_ctc=76.465, loss_att=57.377, acc=0.649, loss=63.104, backward_time=1.025, grad_norm=85.685, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.316e-04, train_time=2.732
+[gpub001:0/64] 2023-07-03 23:37:51,391 (trainer:732) INFO: 9epoch:train:2201-2300batch: iter_time=1.298e-04, forward_time=0.144, loss_ctc=83.755, loss_att=67.648, acc=0.667, loss=72.480, backward_time=1.025, grad_norm=95.612, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.315e-04, train_time=2.716
+[gpub001:0/64] 2023-07-03 23:40:06,439 (trainer:732) INFO: 9epoch:train:2301-2400batch: iter_time=1.220e-04, forward_time=0.144, loss_ctc=67.251, loss_att=51.411, acc=0.664, loss=56.163, backward_time=1.021, grad_norm=84.349, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.314e-04, train_time=2.701
+[gpub001:0/64] 2023-07-03 23:42:25,815 (trainer:732) INFO: 9epoch:train:2401-2500batch: iter_time=1.184e-04, forward_time=0.145, loss_ctc=81.714, loss_att=66.031, acc=0.656, loss=70.736, backward_time=1.030, grad_norm=98.459, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.313e-04, train_time=2.787
+[gpub001:0/64] 2023-07-03 23:44:44,239 (trainer:732) INFO: 9epoch:train:2501-2600batch: iter_time=1.231e-04, forward_time=0.145, loss_ctc=74.569, loss_att=61.337, acc=0.654, loss=65.307, backward_time=1.031, grad_norm=86.312, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.313e-04, train_time=2.768
+[gpub001:0/64] 2023-07-03 23:47:09,040 (trainer:732) INFO: 9epoch:train:2601-2700batch: iter_time=1.325e-04, forward_time=0.144, loss_ctc=70.692, loss_att=50.739, acc=0.672, loss=56.725, backward_time=1.034, grad_norm=94.955, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.312e-04, train_time=2.896
+[gpub001:0/64] 2023-07-03 23:49:46,981 (trainer:732) INFO: 9epoch:train:2701-2800batch: iter_time=1.268e-04, forward_time=0.146, loss_ctc=82.425, loss_att=66.291, acc=0.670, loss=71.131, backward_time=1.050, grad_norm=96.178, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.311e-04, train_time=3.159
+[gpub001:0/64] 2023-07-03 23:52:17,933 (trainer:732) INFO: 9epoch:train:2801-2900batch: iter_time=1.165e-04, forward_time=0.144, loss_ctc=82.507, loss_att=61.707, acc=0.676, loss=67.947, backward_time=1.069, grad_norm=78.205, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.310e-04, train_time=3.019
+[gpub001:0/64] 2023-07-03 23:55:02,472 (trainer:732) INFO: 9epoch:train:2901-3000batch: iter_time=1.113e-04, forward_time=0.145, loss_ctc=80.597, loss_att=57.983, acc=0.678, loss=64.767, backward_time=1.050, grad_norm=96.704, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.309e-04, train_time=3.291
+[gpub001:0/64] 2023-07-03 23:55:22,500 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-03 23:55:45,123 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-03 23:55:49,354 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f964ed675e0>)
+[gpub001:0/64] 2023-07-03 23:55:49,354 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-03 23:55:49,431 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 00:00:56,009 (trainer:732) INFO: 9epoch:train:3001-3100batch: iter_time=2.040, forward_time=0.188, loss_ctc=87.894, loss_att=64.088, acc=0.666, loss=71.230, backward_time=1.042, grad_norm=89.421, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.184, optim0_lr0=1.308e-04, train_time=7.070
+[gpub001:0/64] 2023-07-04 00:03:24,637 (trainer:732) INFO: 9epoch:train:3101-3200batch: iter_time=7.802e-04, forward_time=0.237, loss_ctc=73.542, loss_att=54.936, acc=0.650, loss=60.518, backward_time=1.038, grad_norm=83.280, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.190, optim0_lr0=1.307e-04, train_time=2.973
+[gpub001:0/64] 2023-07-04 00:05:45,751 (trainer:732) INFO: 9epoch:train:3201-3300batch: iter_time=1.097e-04, forward_time=0.168, loss_ctc=83.071, loss_att=63.854, acc=0.666, loss=69.619, backward_time=1.029, grad_norm=84.872, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.306e-04, train_time=2.820
+[gpub001:0/64] 2023-07-04 00:08:15,073 (trainer:732) INFO: 9epoch:train:3301-3400batch: iter_time=2.763e-04, forward_time=0.240, loss_ctc=67.308, loss_att=51.039, acc=0.655, loss=55.919, backward_time=1.042, grad_norm=76.085, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.187, optim0_lr0=1.305e-04, train_time=2.988
+[gpub001:0/64] 2023-07-04 00:10:38,777 (trainer:732) INFO: 9epoch:train:3401-3500batch: iter_time=1.431e-04, forward_time=0.181, loss_ctc=81.458, loss_att=64.858, acc=0.657, loss=69.838, backward_time=1.035, grad_norm=85.868, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.185, optim0_lr0=1.305e-04, train_time=2.873
+[gpub001:0/64] 2023-07-04 00:13:07,268 (trainer:732) INFO: 9epoch:train:3501-3600batch: iter_time=4.291e-04, forward_time=0.234, loss_ctc=73.981, loss_att=61.944, acc=0.651, loss=65.555, backward_time=1.036, grad_norm=95.691, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.186, optim0_lr0=1.304e-04, train_time=2.970
+[gpub001:0/64] 2023-07-04 00:15:26,106 (trainer:732) INFO: 9epoch:train:3601-3700batch: iter_time=1.187e-04, forward_time=0.168, loss_ctc=68.552, loss_att=50.700, acc=0.671, loss=56.056, backward_time=1.026, grad_norm=80.833, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.303e-04, train_time=2.775
+[gpub001:0/64] 2023-07-04 00:18:11,878 (trainer:732) INFO: 9epoch:train:3701-3800batch: iter_time=5.008e-04, forward_time=0.250, loss_ctc=81.535, loss_att=67.538, acc=0.661, loss=71.737, backward_time=1.059, grad_norm=101.130, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.188, optim0_lr0=1.302e-04, train_time=3.316
+[gpub001:0/64] 2023-07-04 00:20:43,658 (trainer:732) INFO: 9epoch:train:3801-3900batch: iter_time=5.966e-04, forward_time=0.156, loss_ctc=82.350, loss_att=61.685, acc=0.669, loss=67.885, backward_time=1.048, grad_norm=98.009, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.301e-04, train_time=3.035
+[gpub001:0/64] 2023-07-04 00:23:35,253 (trainer:732) INFO: 9epoch:train:3901-4000batch: iter_time=1.404e-04, forward_time=0.240, loss_ctc=78.814, loss_att=56.564, acc=0.678, loss=63.239, backward_time=1.090, grad_norm=90.393, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.185, optim0_lr0=1.300e-04, train_time=3.431
+[gpub001:0/64] 2023-07-04 00:23:55,438 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-04 00:24:17,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 00:24:21,806 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93fa519e70>)
+[gpub001:0/64] 2023-07-04 00:24:21,806 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-04 00:24:21,829 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 00:31:27,733 (trainer:732) INFO: 9epoch:train:4001-4100batch: iter_time=2.183, forward_time=0.215, loss_ctc=87.545, loss_att=63.219, acc=0.679, loss=70.517, backward_time=1.043, grad_norm=95.106, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.186, optim0_lr0=1.299e-04, train_time=9.450
+[gpub001:0/64] 2023-07-04 00:33:43,494 (trainer:732) INFO: 9epoch:train:4101-4200batch: iter_time=1.178e-04, forward_time=0.145, loss_ctc=74.551, loss_att=56.168, acc=0.656, loss=61.683, backward_time=1.024, grad_norm=92.157, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.298e-04, train_time=2.715
+[gpub001:0/64] 2023-07-04 00:35:59,666 (trainer:732) INFO: 9epoch:train:4201-4300batch: iter_time=1.157e-04, forward_time=0.145, loss_ctc=82.064, loss_att=63.472, acc=0.678, loss=69.050, backward_time=1.028, grad_norm=98.221, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.297e-04, train_time=2.723
+[gpub001:0/64] 2023-07-04 00:38:15,095 (trainer:732) INFO: 9epoch:train:4301-4400batch: iter_time=1.075e-04, forward_time=0.143, loss_ctc=68.892, loss_att=51.913, acc=0.663, loss=57.007, backward_time=1.023, grad_norm=82.794, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.297e-04, train_time=2.708
+[gpub001:0/64] 2023-07-04 00:40:30,833 (trainer:732) INFO: 9epoch:train:4401-4500batch: iter_time=1.207e-04, forward_time=0.145, loss_ctc=82.133, loss_att=65.286, acc=0.663, loss=70.340, backward_time=1.027, grad_norm=97.104, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.296e-04, train_time=2.715
+[gpub001:0/64] 2023-07-04 00:42:58,614 (trainer:732) INFO: 9epoch:train:4501-4600batch: iter_time=1.234e-04, forward_time=0.144, loss_ctc=73.526, loss_att=61.357, acc=0.659, loss=65.008, backward_time=1.042, grad_norm=89.909, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.295e-04, train_time=2.955
+[gpub001:0/64] 2023-07-04 00:45:13,932 (trainer:732) INFO: 9epoch:train:4601-4700batch: iter_time=1.179e-04, forward_time=0.144, loss_ctc=69.403, loss_att=50.034, acc=0.676, loss=55.845, backward_time=1.023, grad_norm=93.331, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.294e-04, train_time=2.706
+[gpub001:0/64] 2023-07-04 00:47:42,926 (trainer:732) INFO: 9epoch:train:4701-4800batch: iter_time=1.269e-04, forward_time=0.145, loss_ctc=81.878, loss_att=66.373, acc=0.671, loss=71.025, backward_time=1.048, grad_norm=98.786, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.293e-04, train_time=2.980
+[gpub001:0/64] 2023-07-04 00:50:21,085 (trainer:732) INFO: 9epoch:train:4801-4900batch: iter_time=1.140e-04, forward_time=0.146, loss_ctc=83.277, loss_att=60.906, acc=0.679, loss=67.617, backward_time=1.047, grad_norm=96.167, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.292e-04, train_time=3.163
+[gpub001:0/64] 2023-07-04 00:52:52,099 (trainer:732) INFO: 9epoch:train:4901-5000batch: iter_time=1.094e-04, forward_time=0.146, loss_ctc=79.400, loss_att=56.855, acc=0.683, loss=63.619, backward_time=1.051, grad_norm=92.534, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.181, optim0_lr0=1.291e-04, train_time=3.020
+[gpub001:0/64] 2023-07-04 00:53:12,127 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-04 00:53:34,575 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 00:53:38,834 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93fa52b5e0>)
+[gpub001:0/64] 2023-07-04 00:53:38,834 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-04 00:53:38,842 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 00:59:54,062 (trainer:732) INFO: 9epoch:train:5001-5100batch: iter_time=1.788, forward_time=0.204, loss_ctc=86.818, loss_att=63.626, acc=0.669, loss=70.584, backward_time=1.040, grad_norm=93.196, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.291e-04, train_time=8.438
+[gpub001:0/64] 2023-07-04 01:02:10,587 (trainer:732) INFO: 9epoch:train:5101-5200batch: iter_time=1.492e-04, forward_time=0.146, loss_ctc=76.113, loss_att=56.301, acc=0.651, loss=62.245, backward_time=1.026, grad_norm=83.102, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.290e-04, train_time=2.732
+[gpub001:0/64] 2023-07-04 01:04:27,185 (trainer:732) INFO: 9epoch:train:5201-5300batch: iter_time=1.206e-04, forward_time=0.144, loss_ctc=80.303, loss_att=62.833, acc=0.671, loss=68.074, backward_time=1.028, grad_norm=89.660, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.289e-04, train_time=2.732
+[gpub001:0/64] 2023-07-04 01:06:45,184 (trainer:732) INFO: 9epoch:train:5301-5400batch: iter_time=1.284e-04, forward_time=0.144, loss_ctc=65.878, loss_att=49.861, acc=0.667, loss=54.666, backward_time=1.025, grad_norm=73.201, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.288e-04, train_time=2.760
+[gpub001:0/64] 2023-07-04 01:09:00,951 (trainer:732) INFO: 9epoch:train:5401-5500batch: iter_time=1.160e-04, forward_time=0.144, loss_ctc=80.403, loss_att=63.695, acc=0.661, loss=68.708, backward_time=1.023, grad_norm=99.950, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.287e-04, train_time=2.715
+[gpub001:0/64] 2023-07-04 01:11:27,791 (trainer:732) INFO: 9epoch:train:5501-5600batch: iter_time=1.300e-04, forward_time=0.144, loss_ctc=73.122, loss_att=62.649, acc=0.653, loss=65.791, backward_time=1.033, grad_norm=94.019, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.286e-04, train_time=2.937
+[gpub001:0/64] 2023-07-04 01:13:52,865 (trainer:732) INFO: 9epoch:train:5601-5700batch: iter_time=1.200e-04, forward_time=0.144, loss_ctc=67.757, loss_att=50.360, acc=0.677, loss=55.579, backward_time=1.034, grad_norm=81.435, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.183, optim0_lr0=1.285e-04, train_time=2.901
+[gpub001:0/64] 2023-07-04 01:16:15,288 (trainer:732) INFO: 9epoch:train:5701-5800batch: iter_time=1.206e-04, forward_time=0.145, loss_ctc=80.702, loss_att=67.135, acc=0.662, loss=71.205, backward_time=1.033, grad_norm=92.187, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.285e-04, train_time=2.848
+[gpub001:0/64] 2023-07-04 01:19:17,300 (trainer:732) INFO: 9epoch:train:5801-5900batch: iter_time=1.355e-04, forward_time=0.146, loss_ctc=81.556, loss_att=61.325, acc=0.670, loss=67.394, backward_time=1.087, grad_norm=98.823, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.284e-04, train_time=3.640
+[gpub001:0/64] 2023-07-04 01:21:54,934 (trainer:732) INFO: 9epoch:train:5901-6000batch: iter_time=1.163e-04, forward_time=0.143, loss_ctc=78.794, loss_att=57.019, acc=0.673, loss=63.552, backward_time=1.039, grad_norm=110.341, clip=100.000, loss_scale=1.718e+10, optim_step_time=0.182, optim0_lr0=1.283e-04, train_time=3.152
+[gpub001:0/64] 2023-07-04 01:22:12,906 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-04 01:22:35,275 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 01:22:39,453 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92171fbdf0>)
+[gpub001:0/64] 2023-07-04 01:22:39,453 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-04 01:22:39,563 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 01:27:58,379 (trainer:732) INFO: 9epoch:train:6001-6100batch: iter_time=2.080, forward_time=0.171, loss_ctc=85.395, loss_att=61.689, acc=0.672, loss=68.801, backward_time=1.043, grad_norm=87.997, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.184, optim0_lr0=1.282e-04, train_time=7.268
+[gpub001:0/64] 2023-07-04 01:30:14,639 (trainer:732) INFO: 9epoch:train:6101-6200batch: iter_time=1.186e-04, forward_time=0.144, loss_ctc=73.105, loss_att=54.251, acc=0.658, loss=59.907, backward_time=1.024, grad_norm=81.029, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.281e-04, train_time=2.725
+[gpub001:0/64] 2023-07-04 01:32:30,963 (trainer:732) INFO: 9epoch:train:6201-6300batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=82.012, loss_att=61.684, acc=0.672, loss=67.782, backward_time=1.027, grad_norm=92.039, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.280e-04, train_time=2.726
+[gpub001:0/64] 2023-07-04 01:34:46,436 (trainer:732) INFO: 9epoch:train:6301-6400batch: iter_time=1.161e-04, forward_time=0.144, loss_ctc=68.673, loss_att=51.531, acc=0.660, loss=56.674, backward_time=1.022, grad_norm=84.864, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.280e-04, train_time=2.709
+[gpub001:0/64] 2023-07-04 01:37:02,196 (trainer:732) INFO: 9epoch:train:6401-6500batch: iter_time=1.343e-04, forward_time=0.146, loss_ctc=80.427, loss_att=63.759, acc=0.665, loss=68.760, backward_time=1.026, grad_norm=91.094, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.279e-04, train_time=2.715
+[gpub001:0/64] 2023-07-04 01:39:23,012 (trainer:732) INFO: 9epoch:train:6501-6600batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=71.841, loss_att=62.207, acc=0.654, loss=65.098, backward_time=1.028, grad_norm=91.719, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.278e-04, train_time=2.816
+[gpub001:0/64] 2023-07-04 01:41:50,816 (trainer:732) INFO: 9epoch:train:6601-6700batch: iter_time=1.201e-04, forward_time=0.145, loss_ctc=67.160, loss_att=49.622, acc=0.677, loss=54.883, backward_time=1.040, grad_norm=85.772, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.277e-04, train_time=2.956
+[gpub001:0/64] 2023-07-04 01:44:34,992 (trainer:732) INFO: 9epoch:train:6701-6800batch: iter_time=1.203e-04, forward_time=0.146, loss_ctc=80.981, loss_att=67.197, acc=0.664, loss=71.332, backward_time=1.065, grad_norm=96.825, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.276e-04, train_time=3.283
+[gpub001:0/64] 2023-07-04 01:47:19,271 (trainer:732) INFO: 9epoch:train:6801-6900batch: iter_time=1.257e-04, forward_time=0.145, loss_ctc=82.167, loss_att=60.632, acc=0.674, loss=67.092, backward_time=1.082, grad_norm=84.474, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.275e-04, train_time=3.285
+[gpub001:0/64] 2023-07-04 01:50:08,231 (trainer:732) INFO: 9epoch:train:6901-7000batch: iter_time=1.171e-04, forward_time=0.143, loss_ctc=79.723, loss_att=57.262, acc=0.677, loss=64.000, backward_time=1.133, grad_norm=106.009, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.275e-04, train_time=3.379
+[gpub001:0/64] 2023-07-04 01:50:26,658 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-04 01:50:49,043 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 01:50:53,273 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93fbd4a440>)
+[gpub001:0/64] 2023-07-04 01:50:53,273 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-04 01:50:53,280 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 01:56:44,529 (trainer:732) INFO: 9epoch:train:7001-7100batch: iter_time=1.794, forward_time=0.205, loss_ctc=85.101, loss_att=63.351, acc=0.681, loss=69.876, backward_time=1.054, grad_norm=94.225, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.274e-04, train_time=7.925
+[gpub001:0/64] 2023-07-04 01:59:09,417 (trainer:732) INFO: 9epoch:train:7101-7200batch: iter_time=1.088e-04, forward_time=0.145, loss_ctc=73.666, loss_att=55.956, acc=0.661, loss=61.269, backward_time=1.036, grad_norm=80.604, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.273e-04, train_time=2.898
+[gpub001:0/64] 2023-07-04 02:01:39,261 (trainer:732) INFO: 9epoch:train:7201-7300batch: iter_time=1.157e-04, forward_time=0.144, loss_ctc=81.381, loss_att=63.231, acc=0.677, loss=68.676, backward_time=1.040, grad_norm=84.281, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.272e-04, train_time=2.997
+[gpub001:0/64] 2023-07-04 02:03:59,902 (trainer:732) INFO: 9epoch:train:7301-7400batch: iter_time=1.094e-04, forward_time=0.144, loss_ctc=66.904, loss_att=50.560, acc=0.673, loss=55.463, backward_time=1.028, grad_norm=89.574, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.271e-04, train_time=2.813
+[gpub001:0/64] 2023-07-04 02:06:31,880 (trainer:732) INFO: 9epoch:train:7401-7500batch: iter_time=1.110e-04, forward_time=0.145, loss_ctc=79.781, loss_att=64.132, acc=0.667, loss=68.827, backward_time=1.043, grad_norm=81.963, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.270e-04, train_time=3.039
+[gpub001:0/64] 2023-07-04 02:09:19,491 (trainer:732) INFO: 9epoch:train:7501-7600batch: iter_time=1.151e-04, forward_time=0.143, loss_ctc=74.243, loss_att=62.088, acc=0.657, loss=65.734, backward_time=1.102, grad_norm=86.931, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.270e-04, train_time=3.352
+[gpub001:0/64] 2023-07-04 02:11:55,587 (trainer:732) INFO: 9epoch:train:7601-7700batch: iter_time=1.161e-04, forward_time=0.142, loss_ctc=67.305, loss_att=48.568, acc=0.681, loss=54.189, backward_time=1.068, grad_norm=80.772, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.269e-04, train_time=3.122
+[gpub001:0/64] 2023-07-04 02:14:33,924 (trainer:732) INFO: 9epoch:train:7701-7800batch: iter_time=1.073e-04, forward_time=0.143, loss_ctc=81.014, loss_att=66.986, acc=0.670, loss=71.195, backward_time=1.054, grad_norm=87.112, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.268e-04, train_time=3.166
+[gpub001:0/64] 2023-07-04 02:17:40,732 (trainer:732) INFO: 9epoch:train:7801-7900batch: iter_time=1.189e-04, forward_time=0.144, loss_ctc=81.461, loss_att=60.824, acc=0.683, loss=67.015, backward_time=1.088, grad_norm=88.042, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.267e-04, train_time=3.736
+[gpub001:0/64] 2023-07-04 02:20:31,309 (trainer:732) INFO: 9epoch:train:7901-8000batch: iter_time=1.118e-04, forward_time=0.143, loss_ctc=77.800, loss_att=55.616, acc=0.685, loss=62.271, backward_time=1.068, grad_norm=96.283, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.181, optim0_lr0=1.266e-04, train_time=3.411
+[gpub001:0/64] 2023-07-04 02:20:50,628 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-04 02:21:12,925 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 02:21:17,185 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92166e7430>)
+[gpub001:0/64] 2023-07-04 02:21:17,185 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-04 02:21:17,193 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 02:27:33,747 (trainer:732) INFO: 9epoch:train:8001-8100batch: iter_time=2.310, forward_time=0.216, loss_ctc=86.868, loss_att=62.477, acc=0.682, loss=69.795, backward_time=1.066, grad_norm=93.604, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.186, optim0_lr0=1.265e-04, train_time=8.448
+[gpub001:0/64] 2023-07-04 02:30:47,915 (trainer:732) INFO: 9epoch:train:8101-8200batch: iter_time=1.102e-04, forward_time=0.144, loss_ctc=72.041, loss_att=54.309, acc=0.663, loss=59.628, backward_time=1.122, grad_norm=85.541, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.265e-04, train_time=3.884
+[gpub001:0/64] 2023-07-04 02:34:23,550 (trainer:732) INFO: 9epoch:train:8201-8300batch: iter_time=1.110e-04, forward_time=0.143, loss_ctc=80.318, loss_att=62.685, acc=0.682, loss=67.975, backward_time=1.191, grad_norm=91.389, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.264e-04, train_time=4.312
+[gpub001:0/64] 2023-07-04 02:38:10,352 (trainer:732) INFO: 9epoch:train:8301-8400batch: iter_time=1.088e-04, forward_time=0.143, loss_ctc=66.182, loss_att=50.205, acc=0.672, loss=54.998, backward_time=1.197, grad_norm=84.213, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.263e-04, train_time=4.536
+[gpub001:0/64] 2023-07-04 02:41:23,634 (trainer:732) INFO: 9epoch:train:8401-8500batch: iter_time=1.048e-04, forward_time=0.144, loss_ctc=80.035, loss_att=64.016, acc=0.668, loss=68.822, backward_time=1.096, grad_norm=82.939, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.262e-04, train_time=3.865
+[gpub001:0/64] 2023-07-04 02:45:08,691 (trainer:732) INFO: 9epoch:train:8501-8600batch: iter_time=1.245e-04, forward_time=0.145, loss_ctc=72.274, loss_att=60.192, acc=0.664, loss=63.816, backward_time=1.123, grad_norm=86.066, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.261e-04, train_time=4.501
+[gpub001:0/64] 2023-07-04 02:48:18,435 (trainer:732) INFO: 9epoch:train:8601-8700batch: iter_time=1.254e-04, forward_time=0.146, loss_ctc=66.300, loss_att=48.163, acc=0.686, loss=53.604, backward_time=1.120, grad_norm=78.350, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.261e-04, train_time=3.795
+[gpub001:0/64] 2023-07-04 02:51:37,339 (trainer:732) INFO: 9epoch:train:8701-8800batch: iter_time=1.333e-04, forward_time=0.146, loss_ctc=80.163, loss_att=63.912, acc=0.678, loss=68.787, backward_time=1.095, grad_norm=86.030, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.260e-04, train_time=3.978
+[gpub001:0/64] 2023-07-04 02:54:31,632 (trainer:732) INFO: 9epoch:train:8801-8900batch: iter_time=1.137e-04, forward_time=0.145, loss_ctc=81.677, loss_att=59.992, acc=0.684, loss=66.498, backward_time=1.097, grad_norm=75.414, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.259e-04, train_time=3.486
+[gpub001:0/64] 2023-07-04 02:57:46,255 (trainer:732) INFO: 9epoch:train:8901-9000batch: iter_time=1.238e-04, forward_time=0.145, loss_ctc=76.679, loss_att=55.296, acc=0.689, loss=61.711, backward_time=1.165, grad_norm=99.487, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.258e-04, train_time=3.892
+[gpub001:0/64] 2023-07-04 02:58:06,283 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-04 02:58:29,174 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 02:58:33,478 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94304fb9a0>)
+[gpub001:0/64] 2023-07-04 02:58:33,478 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-04 02:58:33,485 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 03:05:00,125 (trainer:732) INFO: 9epoch:train:9001-9100batch: iter_time=1.878, forward_time=0.187, loss_ctc=86.043, loss_att=63.578, acc=0.668, loss=70.318, backward_time=1.044, grad_norm=92.448, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.184, optim0_lr0=1.257e-04, train_time=8.677
+[gpub001:0/64] 2023-07-04 03:07:20,404 (trainer:732) INFO: 9epoch:train:9101-9200batch: iter_time=1.129e-04, forward_time=0.144, loss_ctc=73.663, loss_att=55.071, acc=0.654, loss=60.649, backward_time=1.032, grad_norm=88.403, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.257e-04, train_time=2.806
+[gpub001:0/64] 2023-07-04 03:09:37,950 (trainer:732) INFO: 9epoch:train:9201-9300batch: iter_time=1.218e-04, forward_time=0.143, loss_ctc=80.458, loss_att=62.287, acc=0.676, loss=67.738, backward_time=1.027, grad_norm=92.966, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.256e-04, train_time=2.751
+[gpub001:0/64] 2023-07-04 03:11:53,135 (trainer:732) INFO: 9epoch:train:9301-9400batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=66.752, loss_att=50.047, acc=0.665, loss=55.059, backward_time=1.022, grad_norm=85.205, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.255e-04, train_time=2.703
+[gpub001:0/64] 2023-07-04 03:14:19,601 (trainer:732) INFO: 9epoch:train:9401-9500batch: iter_time=1.238e-04, forward_time=0.144, loss_ctc=80.260, loss_att=63.174, acc=0.665, loss=68.300, backward_time=1.039, grad_norm=86.691, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.254e-04, train_time=2.929
+[gpub001:0/64] 2023-07-04 03:17:07,879 (trainer:732) INFO: 9epoch:train:9501-9600batch: iter_time=1.143e-04, forward_time=0.150, loss_ctc=73.284, loss_att=61.761, acc=0.657, loss=65.218, backward_time=1.062, grad_norm=97.161, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.254e-04, train_time=3.365
+[gpub001:0/64] 2023-07-04 03:19:34,610 (trainer:732) INFO: 9epoch:train:9601-9700batch: iter_time=5.961e-04, forward_time=0.161, loss_ctc=66.042, loss_att=48.731, acc=0.682, loss=53.924, backward_time=1.039, grad_norm=76.636, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.253e-04, train_time=2.934
+[gpub001:0/64] 2023-07-04 03:21:58,061 (trainer:732) INFO: 9epoch:train:9701-9800batch: iter_time=1.166e-04, forward_time=0.175, loss_ctc=78.654, loss_att=65.270, acc=0.669, loss=69.285, backward_time=1.036, grad_norm=89.956, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.184, optim0_lr0=1.252e-04, train_time=2.869
+[gpub001:0/64] 2023-07-04 03:24:35,779 (trainer:732) INFO: 9epoch:train:9801-9900batch: iter_time=1.327e-04, forward_time=0.166, loss_ctc=81.981, loss_att=61.082, acc=0.676, loss=67.352, backward_time=1.044, grad_norm=85.085, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.251e-04, train_time=3.154
+[gpub001:0/64] 2023-07-04 03:27:16,735 (trainer:732) INFO: 9epoch:train:9901-10000batch: iter_time=1.079e-04, forward_time=0.171, loss_ctc=77.652, loss_att=56.185, acc=0.682, loss=62.625, backward_time=1.047, grad_norm=92.946, clip=100.000, loss_scale=3.436e+10, optim_step_time=0.182, optim0_lr0=1.250e-04, train_time=3.219
+[gpub001:0/64] 2023-07-04 03:40:25,652 (trainer:338) INFO: 9epoch results: [train] iter_time=0.199, forward_time=0.157, loss_ctc=77.876, loss_att=59.714, acc=0.667, loss=65.163, backward_time=1.051, grad_norm=90.766, clip=100.000, loss_scale=2.233e+10, optim_step_time=0.182, optim0_lr0=1.292e-04, train_time=3.584, time=4 hours, 59 minutes and 3.18 seconds, total_count=60000, gpu_max_cached_mem_GB=34.164, [valid] loss_ctc=58.837, cer_ctc=0.322, loss_att=48.196, acc=0.608, cer=0.461, wer=0.998, loss=51.388, time=6 minutes and 52.63 seconds, total_count=6578, gpu_max_cached_mem_GB=37.459, [att_plot] time=5 minutes and 53.06 seconds, total_count=0, gpu_max_cached_mem_GB=37.459
+[gpub001:0/64] 2023-07-04 03:40:45,004 (trainer:386) INFO: The best model has been updated: valid.acc, valid.total_count
+[gpub001:0/64] 2023-07-04 03:40:45,009 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/4epoch.pth
+[gpub001:0/64] 2023-07-04 03:40:45,077 (trainer:272) INFO: 10/100epoch started. Estimated time to finish: 2 weeks, 5 days and 17 hours
+[gpub001:0/64] 2023-07-04 03:40:46,338 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-04 03:41:10,884 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 03:41:15,265 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.0", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.0", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.0", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.0", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93e7aa5600>)
+[gpub001:0/64] 2023-07-04 03:41:15,265 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.0, 
+[gpub001:0/64] 2023-07-04 03:41:15,312 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 03:52:28,581 (trainer:732) INFO: 10epoch:train:1-100batch: iter_time=5.563, forward_time=0.206, loss_ctc=74.661, loss_att=61.707, acc=0.668, loss=65.594, backward_time=1.045, grad_norm=92.048, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.187, optim0_lr0=1.250e-04, train_time=14.055
+[gpub001:0/64] 2023-07-04 03:54:52,229 (trainer:732) INFO: 10epoch:train:101-200batch: iter_time=1.120e-04, forward_time=0.145, loss_ctc=79.571, loss_att=57.713, acc=0.649, loss=64.270, backward_time=1.039, grad_norm=111.566, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.249e-04, train_time=2.875
+[gpub001:0/64] 2023-07-04 03:57:21,049 (trainer:732) INFO: 10epoch:train:201-300batch: iter_time=1.163e-04, forward_time=0.145, loss_ctc=83.086, loss_att=62.207, acc=0.668, loss=68.471, backward_time=1.041, grad_norm=91.324, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.248e-04, train_time=2.976
+[gpub001:0/64] 2023-07-04 04:00:00,041 (trainer:732) INFO: 10epoch:train:301-400batch: iter_time=0.002, forward_time=0.201, loss_ctc=95.823, loss_att=89.716, acc=0.632, loss=91.548, backward_time=1.115, grad_norm=100.695, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.189, optim0_lr0=1.247e-04, train_time=3.179
+[gpub001:0/64] 2023-07-04 04:02:29,261 (trainer:732) INFO: 10epoch:train:401-500batch: iter_time=1.225e-04, forward_time=0.146, loss_ctc=85.114, loss_att=65.608, acc=0.636, loss=71.460, backward_time=1.044, grad_norm=99.338, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.246e-04, train_time=2.984
+[gpub001:0/64] 2023-07-04 04:05:16,066 (trainer:732) INFO: 10epoch:train:501-600batch: iter_time=1.073e-04, forward_time=0.145, loss_ctc=82.793, loss_att=59.421, acc=0.686, loss=66.432, backward_time=1.067, grad_norm=93.603, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.246e-04, train_time=3.336
+[gpub001:0/64] 2023-07-04 04:08:01,476 (trainer:732) INFO: 10epoch:train:601-700batch: iter_time=1.187e-04, forward_time=0.146, loss_ctc=80.618, loss_att=66.338, acc=0.662, loss=70.622, backward_time=1.058, grad_norm=93.226, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.245e-04, train_time=3.308
+[gpub001:0/64] 2023-07-04 04:10:30,382 (trainer:732) INFO: 10epoch:train:701-800batch: iter_time=1.108e-04, forward_time=0.146, loss_ctc=68.601, loss_att=54.680, acc=0.656, loss=58.856, backward_time=1.044, grad_norm=79.552, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.244e-04, train_time=2.978
+[gpub001:0/64] 2023-07-04 04:12:53,237 (trainer:732) INFO: 10epoch:train:801-900batch: iter_time=1.028e-04, forward_time=0.145, loss_ctc=85.022, loss_att=59.685, acc=0.652, loss=67.286, backward_time=1.036, grad_norm=92.734, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.243e-04, train_time=2.857
+[gpub001:0/64] 2023-07-04 04:15:35,870 (trainer:732) INFO: 10epoch:train:901-1000batch: iter_time=1.063e-04, forward_time=0.145, loss_ctc=78.810, loss_att=62.834, acc=0.664, loss=67.627, backward_time=1.081, grad_norm=81.075, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.243e-04, train_time=3.252
+[gpub001:0/64] 2023-07-04 04:15:49,814 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-04 04:16:11,858 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 04:16:16,315 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.6", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.6", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.6", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.6", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8d29ca0dc0>)
+[gpub001:0/64] 2023-07-04 04:16:16,315 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.6, 
+[gpub001:0/64] 2023-07-04 04:16:16,322 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 04:22:44,047 (trainer:732) INFO: 10epoch:train:1001-1100batch: iter_time=2.290, forward_time=0.192, loss_ctc=71.839, loss_att=58.508, acc=0.676, loss=62.507, backward_time=1.050, grad_norm=82.985, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.185, optim0_lr0=1.242e-04, train_time=8.563
+[gpub001:0/64] 2023-07-04 04:25:13,405 (trainer:732) INFO: 10epoch:train:1101-1200batch: iter_time=1.072e-04, forward_time=0.144, loss_ctc=77.842, loss_att=57.499, acc=0.651, loss=63.602, backward_time=1.049, grad_norm=92.682, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.241e-04, train_time=2.987
+[gpub001:0/64] 2023-07-04 04:28:16,167 (trainer:732) INFO: 10epoch:train:1201-1300batch: iter_time=1.018e-04, forward_time=0.146, loss_ctc=79.271, loss_att=59.359, acc=0.677, loss=65.332, backward_time=1.078, grad_norm=81.278, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.240e-04, train_time=3.655
+[gpub001:0/64] 2023-07-04 04:30:50,171 (trainer:732) INFO: 10epoch:train:1301-1400batch: iter_time=1.297e-04, forward_time=0.145, loss_ctc=93.983, loss_att=86.915, acc=0.636, loss=89.035, backward_time=1.054, grad_norm=96.846, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.240e-04, train_time=3.080
+[gpub001:0/64] 2023-07-04 04:33:24,726 (trainer:732) INFO: 10epoch:train:1401-1500batch: iter_time=1.176e-04, forward_time=0.145, loss_ctc=82.254, loss_att=62.424, acc=0.642, loss=68.373, backward_time=1.061, grad_norm=95.559, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.239e-04, train_time=3.091
+[gpub001:0/64] 2023-07-04 04:36:06,544 (trainer:732) INFO: 10epoch:train:1501-1600batch: iter_time=1.104e-04, forward_time=0.143, loss_ctc=83.817, loss_att=59.295, acc=0.686, loss=66.652, backward_time=1.055, grad_norm=85.857, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.238e-04, train_time=3.236
+[gpub001:0/64] 2023-07-04 04:38:46,522 (trainer:732) INFO: 10epoch:train:1601-1700batch: iter_time=9.965e-05, forward_time=0.145, loss_ctc=80.193, loss_att=65.391, acc=0.664, loss=69.831, backward_time=1.061, grad_norm=85.978, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.237e-04, train_time=3.199
+[gpub001:0/64] 2023-07-04 04:41:26,350 (trainer:732) INFO: 10epoch:train:1701-1800batch: iter_time=1.123e-04, forward_time=0.144, loss_ctc=70.011, loss_att=53.793, acc=0.662, loss=58.658, backward_time=1.055, grad_norm=85.885, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.237e-04, train_time=3.196
+[gpub001:0/64] 2023-07-04 04:44:09,017 (trainer:732) INFO: 10epoch:train:1801-1900batch: iter_time=2.201e-04, forward_time=0.181, loss_ctc=83.456, loss_att=59.210, acc=0.658, loss=66.484, backward_time=1.066, grad_norm=92.746, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.187, optim0_lr0=1.236e-04, train_time=3.253
+[gpub001:0/64] 2023-07-04 04:46:37,714 (trainer:732) INFO: 10epoch:train:1901-2000batch: iter_time=1.199e-04, forward_time=0.183, loss_ctc=77.646, loss_att=63.155, acc=0.668, loss=67.502, backward_time=1.048, grad_norm=81.185, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.186, optim0_lr0=1.235e-04, train_time=2.974
+[gpub001:0/64] 2023-07-04 04:46:55,557 (multiple_iter_factory:32) INFO: Building 2th iter-factory...
+[gpub001:0/64] 2023-07-04 04:47:17,723 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 04:47:22,197 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.7", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.7", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.7", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.7", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9325f03730>)
+[gpub001:0/64] 2023-07-04 04:47:22,197 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.7, 
+[gpub001:0/64] 2023-07-04 04:47:22,204 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 04:52:58,643 (trainer:732) INFO: 10epoch:train:2001-2100batch: iter_time=2.218, forward_time=0.151, loss_ctc=71.055, loss_att=59.286, acc=0.684, loss=62.817, backward_time=1.049, grad_norm=84.270, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.234e-04, train_time=7.619
+[gpub001:0/64] 2023-07-04 04:55:14,620 (trainer:732) INFO: 10epoch:train:2101-2200batch: iter_time=1.359e-04, forward_time=0.145, loss_ctc=77.590, loss_att=57.580, acc=0.661, loss=63.583, backward_time=1.027, grad_norm=85.515, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.234e-04, train_time=2.719
+[gpub001:0/64] 2023-07-04 04:57:30,708 (trainer:732) INFO: 10epoch:train:2201-2300batch: iter_time=1.257e-04, forward_time=0.147, loss_ctc=80.305, loss_att=59.243, acc=0.682, loss=65.561, backward_time=1.029, grad_norm=96.055, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.233e-04, train_time=2.722
+[gpub001:0/64] 2023-07-04 05:00:02,444 (trainer:732) INFO: 10epoch:train:2301-2400batch: iter_time=1.220e-04, forward_time=0.148, loss_ctc=91.760, loss_att=85.779, acc=0.656, loss=87.573, backward_time=1.057, grad_norm=104.082, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.232e-04, train_time=3.034
+[gpub001:0/64] 2023-07-04 05:02:21,395 (trainer:732) INFO: 10epoch:train:2401-2500batch: iter_time=1.104e-04, forward_time=0.145, loss_ctc=82.408, loss_att=62.987, acc=0.655, loss=68.813, backward_time=1.033, grad_norm=110.637, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.231e-04, train_time=2.779
+[gpub001:0/64] 2023-07-04 05:04:43,190 (trainer:732) INFO: 10epoch:train:2501-2600batch: iter_time=1.250e-04, forward_time=0.146, loss_ctc=81.831, loss_att=58.991, acc=0.696, loss=65.843, backward_time=1.038, grad_norm=132.918, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.231e-04, train_time=2.836
+[gpub001:0/64] 2023-07-04 05:07:37,477 (trainer:732) INFO: 10epoch:train:2601-2700batch: iter_time=1.360e-04, forward_time=0.146, loss_ctc=79.920, loss_att=63.924, acc=0.673, loss=68.723, backward_time=1.119, grad_norm=92.002, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.230e-04, train_time=3.486
+[gpub001:0/64] 2023-07-04 05:10:26,668 (trainer:732) INFO: 10epoch:train:2701-2800batch: iter_time=1.242e-04, forward_time=0.145, loss_ctc=68.184, loss_att=53.646, acc=0.669, loss=58.007, backward_time=1.089, grad_norm=101.305, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.229e-04, train_time=3.384
+[gpub001:0/64] 2023-07-04 05:13:12,189 (trainer:732) INFO: 10epoch:train:2801-2900batch: iter_time=1.254e-04, forward_time=0.147, loss_ctc=82.436, loss_att=57.796, acc=0.672, loss=65.188, backward_time=1.060, grad_norm=88.713, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.228e-04, train_time=3.310
+[gpub001:0/64] 2023-07-04 05:15:42,484 (trainer:732) INFO: 10epoch:train:2901-3000batch: iter_time=1.035e-04, forward_time=0.146, loss_ctc=77.301, loss_att=60.873, acc=0.678, loss=65.801, backward_time=1.049, grad_norm=93.069, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.228e-04, train_time=3.006
+[gpub001:0/64] 2023-07-04 05:15:44,046 (multiple_iter_factory:32) INFO: Building 3th iter-factory...
+[gpub001:0/64] 2023-07-04 05:16:06,754 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 05:16:11,272 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.8", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.8", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.8", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.8", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f885737f490>)
+[gpub001:0/64] 2023-07-04 05:16:11,272 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.8, 
+[gpub001:0/64] 2023-07-04 05:16:11,280 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 05:23:59,629 (trainer:732) INFO: 10epoch:train:3001-3100batch: iter_time=1.599, forward_time=0.223, loss_ctc=72.196, loss_att=59.196, acc=0.672, loss=63.096, backward_time=1.053, grad_norm=81.648, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.187, optim0_lr0=1.227e-04, train_time=9.943
+[gpub001:0/64] 2023-07-04 05:26:40,677 (trainer:732) INFO: 10epoch:train:3101-3200batch: iter_time=1.388e-04, forward_time=0.145, loss_ctc=75.881, loss_att=56.536, acc=0.656, loss=62.340, backward_time=1.067, grad_norm=82.455, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.226e-04, train_time=3.221
+[gpub001:0/64] 2023-07-04 05:29:15,619 (trainer:732) INFO: 10epoch:train:3201-3300batch: iter_time=1.326e-04, forward_time=0.147, loss_ctc=79.697, loss_att=58.130, acc=0.679, loss=64.600, backward_time=1.053, grad_norm=94.457, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.225e-04, train_time=3.099
+[gpub001:0/64] 2023-07-04 05:31:55,579 (trainer:732) INFO: 10epoch:train:3301-3400batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=91.505, loss_att=85.159, acc=0.645, loss=87.063, backward_time=1.071, grad_norm=111.923, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.225e-04, train_time=3.199
+[gpub001:0/64] 2023-07-04 05:35:09,355 (trainer:732) INFO: 10epoch:train:3401-3500batch: iter_time=1.234e-04, forward_time=0.146, loss_ctc=80.885, loss_att=61.538, acc=0.648, loss=67.342, backward_time=1.126, grad_norm=93.020, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.224e-04, train_time=3.875
+[gpub001:0/64] 2023-07-04 05:38:07,869 (trainer:732) INFO: 10epoch:train:3501-3600batch: iter_time=1.136e-04, forward_time=0.147, loss_ctc=79.992, loss_att=57.211, acc=0.695, loss=64.045, backward_time=1.084, grad_norm=91.127, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.223e-04, train_time=3.570
+[gpub001:0/64] 2023-07-04 05:41:06,528 (trainer:732) INFO: 10epoch:train:3601-3700batch: iter_time=1.135e-04, forward_time=0.145, loss_ctc=78.588, loss_att=63.951, acc=0.669, loss=68.342, backward_time=1.080, grad_norm=95.189, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.222e-04, train_time=3.573
+[gpub001:0/64] 2023-07-04 05:43:41,455 (trainer:732) INFO: 10epoch:train:3701-3800batch: iter_time=1.262e-04, forward_time=0.146, loss_ctc=67.525, loss_att=52.975, acc=0.664, loss=57.340, backward_time=1.047, grad_norm=79.137, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.183, optim0_lr0=1.222e-04, train_time=3.098
+[gpub001:0/64] 2023-07-04 05:46:38,398 (trainer:732) INFO: 10epoch:train:3801-3900batch: iter_time=1.189e-04, forward_time=0.146, loss_ctc=83.184, loss_att=57.702, acc=0.661, loss=65.347, backward_time=1.081, grad_norm=85.750, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.184, optim0_lr0=1.221e-04, train_time=3.539
+[gpub001:0/64] 2023-07-04 05:49:41,596 (trainer:732) INFO: 10epoch:train:3901-4000batch: iter_time=7.671e-04, forward_time=0.232, loss_ctc=78.322, loss_att=62.743, acc=0.672, loss=67.416, backward_time=1.090, grad_norm=78.715, clip=100.000, loss_scale=6.872e+10, optim_step_time=0.189, optim0_lr0=1.220e-04, train_time=3.664
+[gpub001:0/64] 2023-07-04 05:49:54,817 (multiple_iter_factory:32) INFO: Building 4th iter-factory...
+[gpub001:0/64] 2023-07-04 05:50:17,125 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 05:50:21,327 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.9", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.9", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.9", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.9", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93251e7460>)
+[gpub001:0/64] 2023-07-04 05:50:21,327 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.9, 
+[gpub001:0/64] 2023-07-04 05:50:21,337 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 05:57:30,927 (trainer:732) INFO: 10epoch:train:4001-4100batch: iter_time=2.608, forward_time=0.189, loss_ctc=70.837, loss_att=58.363, acc=0.687, loss=62.105, backward_time=1.088, grad_norm=80.611, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.186, optim0_lr0=1.219e-04, train_time=9.386
+[gpub001:0/64] 2023-07-04 05:59:46,877 (trainer:732) INFO: 10epoch:train:4101-4200batch: iter_time=1.047e-04, forward_time=0.146, loss_ctc=76.833, loss_att=56.960, acc=0.666, loss=62.922, backward_time=1.029, grad_norm=98.324, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.219e-04, train_time=2.719
+[gpub001:0/64] 2023-07-04 06:02:08,031 (trainer:732) INFO: 10epoch:train:4201-4300batch: iter_time=1.282e-04, forward_time=0.149, loss_ctc=77.560, loss_att=57.872, acc=0.688, loss=63.779, backward_time=1.038, grad_norm=85.425, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.218e-04, train_time=2.823
+[gpub001:0/64] 2023-07-04 06:04:26,653 (trainer:732) INFO: 10epoch:train:4301-4400batch: iter_time=1.141e-04, forward_time=0.147, loss_ctc=92.369, loss_att=84.411, acc=0.657, loss=86.799, backward_time=1.034, grad_norm=104.688, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.217e-04, train_time=2.772
+[gpub001:0/64] 2023-07-04 06:06:42,171 (trainer:732) INFO: 10epoch:train:4401-4500batch: iter_time=1.068e-04, forward_time=0.146, loss_ctc=80.486, loss_att=61.536, acc=0.660, loss=67.221, backward_time=1.025, grad_norm=95.744, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.217e-04, train_time=2.710
+[gpub001:0/64] 2023-07-04 06:08:58,348 (trainer:732) INFO: 10epoch:train:4501-4600batch: iter_time=1.054e-04, forward_time=0.147, loss_ctc=80.334, loss_att=58.029, acc=0.703, loss=64.720, backward_time=1.031, grad_norm=100.954, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.216e-04, train_time=2.723
+[gpub001:0/64] 2023-07-04 06:11:28,165 (trainer:732) INFO: 10epoch:train:4601-4700batch: iter_time=1.078e-04, forward_time=0.146, loss_ctc=77.401, loss_att=63.987, acc=0.673, loss=68.011, backward_time=1.064, grad_norm=87.364, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.215e-04, train_time=2.996
+[gpub001:0/64] 2023-07-04 06:14:13,116 (trainer:732) INFO: 10epoch:train:4701-4800batch: iter_time=1.077e-04, forward_time=0.147, loss_ctc=66.979, loss_att=52.793, acc=0.677, loss=57.049, backward_time=1.061, grad_norm=71.593, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.214e-04, train_time=3.299
+[gpub001:0/64] 2023-07-04 06:16:43,533 (trainer:732) INFO: 10epoch:train:4801-4900batch: iter_time=1.076e-04, forward_time=0.162, loss_ctc=83.390, loss_att=59.533, acc=0.672, loss=66.690, backward_time=1.046, grad_norm=101.548, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.214e-04, train_time=3.008
+[gpub001:0/64] 2023-07-04 06:19:33,266 (trainer:732) INFO: 10epoch:train:4901-5000batch: iter_time=5.750e-04, forward_time=0.213, loss_ctc=76.339, loss_att=59.292, acc=0.682, loss=64.406, backward_time=1.106, grad_norm=94.146, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.187, optim0_lr0=1.213e-04, train_time=3.394
+[gpub001:0/64] 2023-07-04 06:19:53,288 (multiple_iter_factory:32) INFO: Building 5th iter-factory...
+[gpub001:0/64] 2023-07-04 06:20:15,638 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 06:20:19,922 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9404ed7e80>)
+[gpub001:0/64] 2023-07-04 06:20:19,922 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-04 06:20:19,929 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 06:27:20,326 (trainer:732) INFO: 10epoch:train:5001-5100batch: iter_time=2.350, forward_time=0.170, loss_ctc=71.177, loss_att=57.991, acc=0.682, loss=61.947, backward_time=1.051, grad_norm=82.378, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.212e-04, train_time=9.341
+[gpub001:0/64] 2023-07-04 06:29:40,314 (trainer:732) INFO: 10epoch:train:5101-5200batch: iter_time=1.100e-04, forward_time=0.146, loss_ctc=75.335, loss_att=55.900, acc=0.661, loss=61.731, backward_time=1.032, grad_norm=90.481, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.212e-04, train_time=2.800
+[gpub001:0/64] 2023-07-04 06:31:57,088 (trainer:732) INFO: 10epoch:train:5201-5300batch: iter_time=1.165e-04, forward_time=0.146, loss_ctc=79.123, loss_att=58.087, acc=0.682, loss=64.398, backward_time=1.031, grad_norm=88.409, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.211e-04, train_time=2.735
+[gpub001:0/64] 2023-07-04 06:34:23,229 (trainer:732) INFO: 10epoch:train:5301-5400batch: iter_time=1.130e-04, forward_time=0.147, loss_ctc=90.993, loss_att=83.458, acc=0.647, loss=85.719, backward_time=1.042, grad_norm=94.734, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.210e-04, train_time=2.923
+[gpub001:0/64] 2023-07-04 06:36:38,714 (trainer:732) INFO: 10epoch:train:5401-5500batch: iter_time=1.152e-04, forward_time=0.145, loss_ctc=80.319, loss_att=60.819, acc=0.653, loss=66.669, backward_time=1.025, grad_norm=93.349, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.209e-04, train_time=2.709
+[gpub001:0/64] 2023-07-04 06:38:58,311 (trainer:732) INFO: 10epoch:train:5501-5600batch: iter_time=1.103e-04, forward_time=0.145, loss_ctc=79.823, loss_att=56.920, acc=0.697, loss=63.791, backward_time=1.031, grad_norm=86.237, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.209e-04, train_time=2.792
+[gpub001:0/64] 2023-07-04 06:41:19,825 (trainer:732) INFO: 10epoch:train:5601-5700batch: iter_time=1.113e-04, forward_time=0.144, loss_ctc=79.289, loss_att=64.586, acc=0.666, loss=68.997, backward_time=1.035, grad_norm=117.701, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.208e-04, train_time=2.830
+[gpub001:0/64] 2023-07-04 06:43:36,922 (trainer:732) INFO: 10epoch:train:5701-5800batch: iter_time=1.127e-04, forward_time=0.146, loss_ctc=65.745, loss_att=51.767, acc=0.674, loss=55.961, backward_time=1.030, grad_norm=84.338, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.207e-04, train_time=2.742
+[gpub001:0/64] 2023-07-04 06:46:10,640 (trainer:732) INFO: 10epoch:train:5801-5900batch: iter_time=5.857e-04, forward_time=0.153, loss_ctc=83.685, loss_att=57.586, acc=0.660, loss=65.415, backward_time=1.047, grad_norm=90.102, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.207e-04, train_time=3.074
+[gpub001:0/64] 2023-07-04 06:48:48,486 (trainer:732) INFO: 10epoch:train:5901-6000batch: iter_time=1.104e-04, forward_time=0.190, loss_ctc=76.571, loss_att=62.067, acc=0.674, loss=66.418, backward_time=1.059, grad_norm=83.226, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.206e-04, train_time=3.157
+[gpub001:0/64] 2023-07-04 06:48:54,345 (multiple_iter_factory:32) INFO: Building 6th iter-factory...
+[gpub001:0/64] 2023-07-04 06:49:17,118 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 06:49:21,432 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8fc8fa3d60>)
+[gpub001:0/64] 2023-07-04 06:49:21,432 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-04 06:49:21,440 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 06:54:57,248 (trainer:732) INFO: 10epoch:train:6001-6100batch: iter_time=1.887, forward_time=0.176, loss_ctc=72.668, loss_att=58.875, acc=0.681, loss=63.013, backward_time=1.047, grad_norm=78.204, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.205e-04, train_time=7.374
+[gpub001:0/64] 2023-07-04 06:57:13,494 (trainer:732) INFO: 10epoch:train:6101-6200batch: iter_time=1.222e-04, forward_time=0.147, loss_ctc=75.100, loss_att=56.125, acc=0.660, loss=61.817, backward_time=1.028, grad_norm=96.151, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.205e-04, train_time=2.726
+[gpub001:0/64] 2023-07-04 06:59:29,483 (trainer:732) INFO: 10epoch:train:6201-6300batch: iter_time=1.260e-04, forward_time=0.149, loss_ctc=78.803, loss_att=58.253, acc=0.683, loss=64.418, backward_time=1.028, grad_norm=92.452, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.204e-04, train_time=2.720
+[gpub001:0/64] 2023-07-04 07:01:57,947 (trainer:732) INFO: 10epoch:train:6301-6400batch: iter_time=1.262e-04, forward_time=0.166, loss_ctc=92.014, loss_att=84.009, acc=0.648, loss=86.411, backward_time=1.045, grad_norm=99.864, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.203e-04, train_time=2.969
+[gpub001:0/64] 2023-07-04 07:04:34,258 (trainer:732) INFO: 10epoch:train:6401-6500batch: iter_time=1.256e-04, forward_time=0.156, loss_ctc=78.738, loss_att=59.782, acc=0.657, loss=65.469, backward_time=1.119, grad_norm=101.854, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.185, optim0_lr0=1.202e-04, train_time=3.126
+[gpub001:0/64] 2023-07-04 07:06:55,517 (trainer:732) INFO: 10epoch:train:6501-6600batch: iter_time=1.241e-04, forward_time=0.168, loss_ctc=79.093, loss_att=56.679, acc=0.698, loss=63.403, backward_time=1.032, grad_norm=82.095, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.202e-04, train_time=2.825
+[gpub001:0/64] 2023-07-04 07:09:19,221 (trainer:732) INFO: 10epoch:train:6601-6700batch: iter_time=1.178e-04, forward_time=0.177, loss_ctc=77.971, loss_att=64.064, acc=0.671, loss=68.236, backward_time=1.057, grad_norm=85.100, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.201e-04, train_time=2.874
+[gpub001:0/64] 2023-07-04 07:11:39,694 (trainer:732) INFO: 10epoch:train:6701-6800batch: iter_time=1.091e-04, forward_time=0.146, loss_ctc=66.743, loss_att=52.391, acc=0.671, loss=56.697, backward_time=1.034, grad_norm=88.159, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.200e-04, train_time=2.809
+[gpub001:0/64] 2023-07-04 07:14:01,640 (trainer:732) INFO: 10epoch:train:6801-6900batch: iter_time=1.103e-04, forward_time=0.147, loss_ctc=82.598, loss_att=57.601, acc=0.667, loss=65.100, backward_time=1.035, grad_norm=88.853, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.200e-04, train_time=2.839
+[gpub001:0/64] 2023-07-04 07:16:24,881 (trainer:732) INFO: 10epoch:train:6901-7000batch: iter_time=1.054e-04, forward_time=0.147, loss_ctc=74.921, loss_att=60.937, acc=0.676, loss=65.132, backward_time=1.031, grad_norm=86.997, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.199e-04, train_time=2.865
+[gpub001:0/64] 2023-07-04 07:16:37,979 (multiple_iter_factory:32) INFO: Building 7th iter-factory...
+[gpub001:0/64] 2023-07-04 07:17:00,253 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 07:17:04,527 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.5", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.5", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.5", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.5", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f8fc8415270>)
+[gpub001:0/64] 2023-07-04 07:17:04,527 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.5, 
+[gpub001:0/64] 2023-07-04 07:17:04,630 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 07:25:43,021 (trainer:732) INFO: 10epoch:train:7001-7100batch: iter_time=2.361, forward_time=0.227, loss_ctc=70.630, loss_att=58.023, acc=0.692, loss=61.805, backward_time=1.046, grad_norm=83.473, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.188, optim0_lr0=1.198e-04, train_time=11.161
+[gpub001:0/64] 2023-07-04 07:27:58,795 (trainer:732) INFO: 10epoch:train:7101-7200batch: iter_time=1.384e-04, forward_time=0.145, loss_ctc=74.050, loss_att=55.642, acc=0.672, loss=61.165, backward_time=1.026, grad_norm=90.165, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.198e-04, train_time=2.717
+[gpub001:0/64] 2023-07-04 07:30:21,144 (trainer:732) INFO: 10epoch:train:7201-7300batch: iter_time=1.130e-04, forward_time=0.146, loss_ctc=77.786, loss_att=57.538, acc=0.689, loss=63.612, backward_time=1.035, grad_norm=81.644, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.197e-04, train_time=2.847
+[gpub001:0/64] 2023-07-04 07:32:41,864 (trainer:732) INFO: 10epoch:train:7301-7400batch: iter_time=9.930e-05, forward_time=0.147, loss_ctc=90.070, loss_att=83.807, acc=0.661, loss=85.686, backward_time=1.045, grad_norm=98.326, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.196e-04, train_time=2.814
+[gpub001:0/64] 2023-07-04 07:35:08,429 (trainer:732) INFO: 10epoch:train:7401-7500batch: iter_time=1.019e-04, forward_time=0.145, loss_ctc=79.856, loss_att=61.766, acc=0.661, loss=67.193, backward_time=1.040, grad_norm=89.441, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.196e-04, train_time=2.931
+[gpub001:0/64] 2023-07-04 07:37:26,655 (trainer:732) INFO: 10epoch:train:7501-7600batch: iter_time=1.190e-04, forward_time=0.145, loss_ctc=81.266, loss_att=58.514, acc=0.702, loss=65.340, backward_time=1.030, grad_norm=92.735, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.195e-04, train_time=2.764
+[gpub001:0/64] 2023-07-04 07:40:02,165 (trainer:732) INFO: 10epoch:train:7601-7700batch: iter_time=5.351e-04, forward_time=0.147, loss_ctc=76.333, loss_att=62.310, acc=0.677, loss=66.517, backward_time=1.056, grad_norm=79.911, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.194e-04, train_time=3.110
+[gpub001:0/64] 2023-07-04 07:42:23,167 (trainer:732) INFO: 10epoch:train:7701-7800batch: iter_time=9.641e-05, forward_time=0.145, loss_ctc=66.356, loss_att=52.050, acc=0.681, loss=56.342, backward_time=1.039, grad_norm=73.483, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.183, optim0_lr0=1.194e-04, train_time=2.820
+[gpub001:0/64] 2023-07-04 07:44:57,556 (trainer:732) INFO: 10epoch:train:7801-7900batch: iter_time=9.989e-05, forward_time=0.155, loss_ctc=81.235, loss_att=57.248, acc=0.677, loss=64.444, backward_time=1.065, grad_norm=86.881, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.193e-04, train_time=3.088
+[gpub001:0/64] 2023-07-04 07:48:09,978 (trainer:732) INFO: 10epoch:train:7901-8000batch: iter_time=1.022e-04, forward_time=0.170, loss_ctc=76.555, loss_att=60.229, acc=0.685, loss=65.127, backward_time=1.097, grad_norm=79.075, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.192e-04, train_time=3.848
+[gpub001:0/64] 2023-07-04 07:48:25,594 (multiple_iter_factory:32) INFO: Building 8th iter-factory...
+[gpub001:0/64] 2023-07-04 07:48:48,049 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 07:48:52,289 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.1", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.1", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.1", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.1", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f92174a3d60>)
+[gpub001:0/64] 2023-07-04 07:48:52,289 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.1, 
+[gpub001:0/64] 2023-07-04 07:48:52,297 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 07:55:59,186 (trainer:732) INFO: 10epoch:train:8001-8100batch: iter_time=2.602, forward_time=0.191, loss_ctc=70.351, loss_att=57.246, acc=0.695, loss=61.178, backward_time=1.076, grad_norm=77.703, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.185, optim0_lr0=1.191e-04, train_time=9.384
+[gpub001:0/64] 2023-07-04 07:58:31,433 (trainer:732) INFO: 10epoch:train:8101-8200batch: iter_time=1.315e-04, forward_time=0.146, loss_ctc=74.708, loss_att=54.669, acc=0.672, loss=60.681, backward_time=1.055, grad_norm=103.432, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.191e-04, train_time=3.045
+[gpub001:0/64] 2023-07-04 08:01:03,790 (trainer:732) INFO: 10epoch:train:8201-8300batch: iter_time=1.264e-04, forward_time=0.147, loss_ctc=77.423, loss_att=57.831, acc=0.689, loss=63.709, backward_time=1.050, grad_norm=101.802, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.190e-04, train_time=3.047
+[gpub001:0/64] 2023-07-04 08:03:43,672 (trainer:732) INFO: 10epoch:train:8301-8400batch: iter_time=1.320e-04, forward_time=0.146, loss_ctc=90.631, loss_att=83.753, acc=0.660, loss=85.816, backward_time=1.067, grad_norm=95.528, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.189e-04, train_time=3.197
+[gpub001:0/64] 2023-07-04 08:06:18,852 (trainer:732) INFO: 10epoch:train:8401-8500batch: iter_time=1.261e-04, forward_time=0.147, loss_ctc=78.090, loss_att=59.472, acc=0.668, loss=65.057, backward_time=1.065, grad_norm=97.820, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.189e-04, train_time=3.103
+[gpub001:0/64] 2023-07-04 08:08:44,080 (trainer:732) INFO: 10epoch:train:8501-8600batch: iter_time=1.136e-04, forward_time=0.147, loss_ctc=81.800, loss_att=58.282, acc=0.704, loss=65.338, backward_time=1.043, grad_norm=86.480, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.188e-04, train_time=2.904
+[gpub001:0/64] 2023-07-04 08:11:19,247 (trainer:732) INFO: 10epoch:train:8601-8700batch: iter_time=1.271e-04, forward_time=0.146, loss_ctc=77.659, loss_att=64.031, acc=0.675, loss=68.119, backward_time=1.051, grad_norm=85.748, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.187e-04, train_time=3.103
+[gpub001:0/64] 2023-07-04 08:14:02,057 (trainer:732) INFO: 10epoch:train:8701-8800batch: iter_time=1.326e-04, forward_time=0.146, loss_ctc=66.799, loss_att=52.036, acc=0.681, loss=56.465, backward_time=1.059, grad_norm=73.862, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.187e-04, train_time=3.256
+[gpub001:0/64] 2023-07-04 08:16:37,378 (trainer:732) INFO: 10epoch:train:8801-8900batch: iter_time=1.286e-04, forward_time=0.146, loss_ctc=82.147, loss_att=57.347, acc=0.680, loss=64.787, backward_time=1.052, grad_norm=89.496, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.186e-04, train_time=3.106
+[gpub001:0/64] 2023-07-04 08:19:08,726 (trainer:732) INFO: 10epoch:train:8901-9000batch: iter_time=1.263e-04, forward_time=0.146, loss_ctc=75.952, loss_att=59.955, acc=0.682, loss=64.755, backward_time=1.047, grad_norm=84.576, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.185e-04, train_time=3.027
+[gpub001:0/64] 2023-07-04 08:19:28,754 (multiple_iter_factory:32) INFO: Building 9th iter-factory...
+[gpub001:0/64] 2023-07-04 08:19:51,073 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 08:19:55,348 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.3", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.3", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.3", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.3", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f93fa3a77c0>)
+[gpub001:0/64] 2023-07-04 08:19:55,348 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.3, 
+[gpub001:0/64] 2023-07-04 08:19:55,408 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 08:26:15,955 (trainer:732) INFO: 10epoch:train:9001-9100batch: iter_time=2.440, forward_time=0.246, loss_ctc=70.151, loss_att=57.464, acc=0.695, loss=61.270, backward_time=1.053, grad_norm=83.805, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.187, optim0_lr0=1.185e-04, train_time=8.544
+[gpub001:0/64] 2023-07-04 08:28:32,451 (trainer:732) INFO: 10epoch:train:9101-9200batch: iter_time=1.067e-04, forward_time=0.145, loss_ctc=75.009, loss_att=55.262, acc=0.672, loss=61.186, backward_time=1.028, grad_norm=94.917, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.184e-04, train_time=2.730
+[gpub001:0/64] 2023-07-04 08:30:51,931 (trainer:732) INFO: 10epoch:train:9201-9300batch: iter_time=1.173e-04, forward_time=0.148, loss_ctc=77.181, loss_att=56.772, acc=0.694, loss=62.895, backward_time=1.031, grad_norm=83.826, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.184, optim0_lr0=1.183e-04, train_time=2.789
+[gpub001:0/64] 2023-07-04 08:33:11,956 (trainer:732) INFO: 10epoch:train:9301-9400batch: iter_time=1.225e-04, forward_time=0.149, loss_ctc=87.476, loss_att=81.540, acc=0.668, loss=83.321, backward_time=1.037, grad_norm=99.964, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.183e-04, train_time=2.800
+[gpub001:0/64] 2023-07-04 08:35:38,898 (trainer:732) INFO: 10epoch:train:9401-9500batch: iter_time=1.206e-04, forward_time=0.147, loss_ctc=78.958, loss_att=59.832, acc=0.663, loss=65.570, backward_time=1.040, grad_norm=101.896, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.182e-04, train_time=2.939
+[gpub001:0/64] 2023-07-04 08:38:05,571 (trainer:732) INFO: 10epoch:train:9501-9600batch: iter_time=1.192e-04, forward_time=0.147, loss_ctc=79.300, loss_att=57.356, acc=0.708, loss=63.939, backward_time=1.043, grad_norm=85.796, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.181e-04, train_time=2.933
+[gpub001:0/64] 2023-07-04 08:40:51,798 (trainer:732) INFO: 10epoch:train:9601-9700batch: iter_time=1.288e-04, forward_time=0.148, loss_ctc=78.120, loss_att=64.613, acc=0.677, loss=68.665, backward_time=1.077, grad_norm=116.603, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.181e-04, train_time=3.324
+[gpub001:0/64] 2023-07-04 08:43:38,644 (trainer:732) INFO: 10epoch:train:9701-9800batch: iter_time=1.182e-04, forward_time=0.147, loss_ctc=66.807, loss_att=51.983, acc=0.683, loss=56.430, backward_time=1.086, grad_norm=86.590, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.180e-04, train_time=3.337
+[gpub001:0/64] 2023-07-04 08:46:13,955 (trainer:732) INFO: 10epoch:train:9801-9900batch: iter_time=1.012e-04, forward_time=0.147, loss_ctc=81.321, loss_att=58.560, acc=0.675, loss=65.388, backward_time=1.046, grad_norm=87.678, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.179e-04, train_time=3.106
+[gpub001:0/64] 2023-07-04 08:48:56,080 (trainer:732) INFO: 10epoch:train:9901-10000batch: iter_time=1.048e-04, forward_time=0.146, loss_ctc=75.337, loss_att=58.584, acc=0.688, loss=63.610, backward_time=1.058, grad_norm=81.275, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.179e-04, train_time=3.242
+[gpub001:0/64] 2023-07-04 09:01:39,700 (trainer:338) INFO: 10epoch results: [train] iter_time=0.259, forward_time=0.156, loss_ctc=78.576, loss_att=61.451, acc=0.672, loss=66.588, backward_time=1.054, grad_norm=91.032, clip=100.000, loss_scale=1.374e+11, optim_step_time=0.184, optim0_lr0=1.213e-04, train_time=3.698, time=5 hours, 8 minutes and 33.64 seconds, total_count=70000, gpu_max_cached_mem_GB=37.459, [valid] loss_ctc=58.858, cer_ctc=0.319, loss_att=49.021, acc=0.607, cer=0.458, wer=1.000, loss=51.972, time=6 minutes and 30.14 seconds, total_count=7590, gpu_max_cached_mem_GB=37.459, [att_plot] time=5 minutes and 50.7 seconds, total_count=0, gpu_max_cached_mem_GB=37.459
+[gpub001:0/64] 2023-07-04 09:01:59,109 (trainer:386) INFO: The best model has been updated: valid.total_count
+[gpub001:0/64] 2023-07-04 09:02:00,803 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till10epoch.pth
+[gpub001:0/64] 2023-07-04 09:02:44,194 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.till10epoch.pth
+[gpub001:0/64] 2023-07-04 09:02:51,878 (trainer:440) INFO: The model files were removed: exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/5epoch.pth
+[gpub001:0/64] 2023-07-04 09:02:51,945 (trainer:272) INFO: 11/100epoch started. Estimated time to finish: 2 weeks, 5 days and 19 hours
+[gpub001:0/64] 2023-07-04 09:02:53,579 (multiple_iter_factory:32) INFO: Building 0th iter-factory...
+[gpub001:0/64] 2023-07-04 09:03:16,753 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 09:03:21,037 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.4", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.4", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.4", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.4", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f94051dae00>)
+[gpub001:0/64] 2023-07-04 09:03:21,038 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.4, 
+[gpub001:0/64] 2023-07-04 09:03:21,282 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+[gpub001:0/64] 2023-07-04 09:11:53,137 (trainer:732) INFO: 11epoch:train:1-100batch: iter_time=3.940, forward_time=0.203, loss_ctc=69.654, loss_att=54.166, acc=0.663, loss=58.813, backward_time=1.045, grad_norm=79.216, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.187, optim0_lr0=1.178e-04, train_time=10.802
+[gpub001:0/64] 2023-07-04 09:14:10,915 (trainer:732) INFO: 11epoch:train:101-200batch: iter_time=1.300e-04, forward_time=0.146, loss_ctc=88.708, loss_att=62.409, acc=0.670, loss=70.299, backward_time=1.030, grad_norm=109.268, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.184, optim0_lr0=1.178e-04, train_time=2.755
+[gpub001:0/64] 2023-07-04 09:16:26,411 (trainer:732) INFO: 11epoch:train:201-300batch: iter_time=1.304e-04, forward_time=0.144, loss_ctc=75.382, loss_att=62.579, acc=0.655, loss=66.420, backward_time=1.027, grad_norm=83.304, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.177e-04, train_time=2.710
+[gpub001:0/64] 2023-07-04 09:18:42,130 (trainer:732) INFO: 11epoch:train:301-400batch: iter_time=1.207e-04, forward_time=0.144, loss_ctc=78.822, loss_att=59.346, acc=0.668, loss=65.189, backward_time=1.026, grad_norm=100.773, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.176e-04, train_time=2.714
+[gpub001:0/64] 2023-07-04 09:21:02,338 (trainer:732) INFO: 11epoch:train:401-500batch: iter_time=1.325e-04, forward_time=0.145, loss_ctc=76.393, loss_att=64.269, acc=0.659, loss=67.906, backward_time=1.036, grad_norm=116.385, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.184, optim0_lr0=1.176e-04, train_time=2.804
+[gpub001:0/64] 2023-07-04 09:23:33,454 (trainer:732) INFO: 11epoch:train:501-600batch: iter_time=1.256e-04, forward_time=0.144, loss_ctc=76.583, loss_att=58.803, acc=0.655, loss=64.137, backward_time=1.050, grad_norm=86.037, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.175e-04, train_time=3.022
+[gpub001:0/64] 2023-07-04 09:26:08,811 (trainer:732) INFO: 11epoch:train:601-700batch: iter_time=1.229e-04, forward_time=0.146, loss_ctc=91.392, loss_att=65.716, acc=0.662, loss=73.419, backward_time=1.075, grad_norm=105.260, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.174e-04, train_time=3.107
+[gpub001:0/64] 2023-07-04 09:28:29,823 (trainer:732) INFO: 11epoch:train:701-800batch: iter_time=1.309e-04, forward_time=0.144, loss_ctc=80.913, loss_att=63.348, acc=0.638, loss=68.618, backward_time=1.034, grad_norm=106.285, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.174e-04, train_time=2.820
+[gpub001:0/64] 2023-07-04 09:31:05,097 (trainer:732) INFO: 11epoch:train:801-900batch: iter_time=3.980e-04, forward_time=0.269, loss_ctc=82.733, loss_att=69.706, acc=0.657, loss=73.614, backward_time=1.058, grad_norm=122.936, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.189, optim0_lr0=1.173e-04, train_time=3.105
+[gpub001:0/64] 2023-07-04 09:33:40,369 (trainer:732) INFO: 11epoch:train:901-1000batch: iter_time=1.140e-04, forward_time=0.148, loss_ctc=67.176, loss_att=55.082, acc=0.663, loss=58.710, backward_time=1.066, grad_norm=79.860, clip=100.000, loss_scale=2.749e+11, optim_step_time=0.183, optim0_lr0=1.172e-04, train_time=3.105
+[gpub001:0/64] 2023-07-04 09:33:57,353 (multiple_iter_factory:32) INFO: Building 1th iter-factory...
+[gpub001:0/64] 2023-07-04 09:34:19,685 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub001:0/64] 2023-07-04 09:34:23,885 (abs_task:1570) INFO: [train] dataset:
+ESPnetDataset(
+  speech: {"path": "exp/s2t_stats_raw_bpe50000/splits10/wav.scp/split.2", "type": "kaldi_ark"}
+  text_prev: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.prev/split.2", "type": "text"}
+  text_ctc: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text.ctc/split.2", "type": "text"}
+  text: {"path": "exp/s2t_stats_raw_bpe50000/splits10/text/split.2", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f9217994c40>)
+[gpub001:0/64] 2023-07-04 09:34:23,885 (abs_task:1571) INFO: [train] Batch sampler: UnsortedBatchSampler(N-batch=45593, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/splits10/speech_shape/split.2, 
+[gpub001:0/64] 2023-07-04 09:34:23,892 (abs_task:1572) INFO: [train] mini-batch sizes summary: N-batch=45593, mean=128.0, min=128, max=129
+Traceback (most recent call last):
+  File "<string>", line 1, in <module>
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
+    exitcode = _main(fd, parent_sentinel)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
+    self = reduction.pickle.load(from_parent)
+_pickle.UnpicklingError: pickle data was truncated
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 140, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with signal SIGKILL
+Traceback (most recent call last):
+  File "<string>", line 1, in <module>
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
+    exitcode = _main(fd, parent_sentinel)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
+    self = reduction.pickle.load(from_parent)
+_pickle.UnpicklingError: pickle data was truncated
+Traceback (most recent call last):
+  File "<string>", line 1, in <module>
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
+    exitcode = _main(fd, parent_sentinel)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
+    self = reduction.pickle.load(from_parent)
+_pickle.UnpicklingError: pickle data was truncated
+Traceback (most recent call last):
+  File "<string>", line 1, in <module>
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
+    exitcode = _main(fd, parent_sentinel)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
+    self = reduction.pickle.load(from_parent)
+_pickle.UnpicklingError: pickle data was truncated
+slurmstepd: error: Detected 1 oom-kill event(s) in StepId=2121665.0. Some of your processes may have been killed by the cgroup out-of-memory handler.
+srun: error: gpub001: task 0: Out Of Memory
+gpub022:3399535:3399624 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub022:3399536:3399623 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub022:3399537:3399622 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub022:3399534:3399625 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub022:3399536:3399536 [2] NCCL INFO comm 0x93f2210 rank 18 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 43] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 42] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 50] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 49] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 40] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 41] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 48] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 51] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub076:3343846:3343926 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub076:3343843:3343928 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub076:3343845:3343927 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub066:1432047:1432134 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub066:1432046:1432136 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub066:1432048:1432137 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub022:3399537:3399537 [3] NCCL INFO comm 0x50214710 rank 19 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub022:3399535:3399535 [1] NCCL INFO comm 0x4fa312f0 rank 17 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub022:3399534:3399534 [0] NCCL INFO comm 0x50711f50 rank 16 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub066:1432048:1432069 [0] NCCL INFO comm 0x51126a70 rank 43 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub066:1432047:1432070 [0] NCCL INFO comm 0x9ed0150 rank 42 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub076:3343845:3343867 [0] NCCL INFO comm 0x4fe2ad90 rank 50 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub076:3343846:3343866 [0] NCCL INFO comm 0x50888c10 rank 51 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub066:1432046:1432068 [0] NCCL INFO comm 0x4fabed20 rank 41 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub076:3343844:3343868 [0] NCCL INFO comm 0xb838ee00 rank 49 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub076:3343843:3343869 [0] NCCL INFO comm 0x508de3f0 rank 48 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub066:1432045:1432071 [0] NCCL INFO comm 0x50653520 rank 40 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 51] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 19] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800147 milliseconds before timing out.
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 48] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 50] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 49] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 18] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800146 milliseconds before timing out.
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 16] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800157 milliseconds before timing out.
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 17] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800147 milliseconds before timing out.
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 42] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 43] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 41] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 40] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 17. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 27] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 46] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 44] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 25] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 47] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 59] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 24] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 58] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 57] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 26] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 45] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub031:1878314:1878398 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub031:1878313:1878397 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub079:2616804:2616888 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub079:2616805:2616890 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub067:1390514:1390593 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub067:1390516:1390596 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub067:1390513:1390595 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+[W ProcessGroupNCCL.cpp:948] [Rank 56] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub067:1390516:1390538 [0] NCCL INFO comm 0x509fc1c0 rank 47 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub079:2616806:2616829 [0] NCCL INFO comm 0x89762f0 rank 59 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub031:1878312:1878336 [0] NCCL INFO comm 0x509faf60 rank 25 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub079:2616805:2616827 [0] NCCL INFO comm 0x8b2c9c20 rank 58 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub067:1390514:1390537 [0] NCCL INFO comm 0xa70b75d0 rank 45 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub031:1878313:1878334 [0] NCCL INFO comm 0xa54f400 rank 26 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub067:1390513:1390536 [0] NCCL INFO comm 0x4ef73970 rank 44 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub079:2616804:2616830 [0] NCCL INFO comm 0x9014adc0 rank 57 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub067:1390515:1390535 [0] NCCL INFO comm 0x5030f0d0 rank 46 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub031:1878314:1878335 [0] NCCL INFO comm 0x511daaa0 rank 27 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub079:2616803:2616828 [0] NCCL INFO comm 0xa9779a50 rank 56 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub031:1878311:1878337 [0] NCCL INFO comm 0xba515710 rank 24 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 27] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 59] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 58] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 46] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 56] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 57] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 44] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 25] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 26] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 45] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 24] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 47] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 16. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 14] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 15] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 13] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 12] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub016:1380823:1380905 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub016:1380823:1380846 [0] NCCL INFO comm 0x517fee10 rank 14 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 54] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 55] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 52] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 53] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub016:1380824:1380845 [0] NCCL INFO comm 0x8d241cc0 rank 15 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub016:1380822:1380843 [0] NCCL INFO comm 0x9b8bb7a0 rank 13 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub077:252894:252972 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub077:252895:252971 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub077:252894:252916 [0] NCCL INFO comm 0xc19a4b40 rank 54 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub077:252893:252970 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub077:252893:252914 [0] NCCL INFO comm 0x509e6280 rank 53 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub077:252895:252913 [0] NCCL INFO comm 0x9491900 rank 55 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub077:252892:252969 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub077:252892:252915 [0] NCCL INFO comm 0x97aafd0 rank 52 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub016:1380821:1380844 [0] NCCL INFO comm 0x50896990 rank 12 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 14] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 54] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 53] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 13] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 55] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 15] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 52] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 21] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 23] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 22] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub030:2310660:2310736 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+[W ProcessGroupNCCL.cpp:948] [Rank 20] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub030:2310660:2310681 [0] NCCL INFO comm 0xa84d3a10 rank 23 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 12] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub030:2310658:2310733 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub030:2310659:2310682 [0] NCCL INFO comm 0x8de12f60 rank 22 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub030:2310658:2310680 [0] NCCL INFO comm 0x50672d50 rank 21 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 38] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 39] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 37] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 36] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub060:1938146:1938228 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub060:1938145:1938226 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub060:1938144:1938225 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub060:1938146:1938170 [0] NCCL INFO comm 0x50addeb0 rank 39 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub060:1938145:1938171 [0] NCCL INFO comm 0xb591e2d0 rank 38 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub060:1938144:1938172 [0] NCCL INFO comm 0x4f3bc650 rank 37 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub030:2310657:2310679 [0] NCCL INFO comm 0x50d929d0 rank 20 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 62] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 63] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub096:1440104:1440184 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub096:1440103:1440185 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+[W ProcessGroupNCCL.cpp:948] [Rank 60] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 61] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub096:1440102:1440186 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub096:1440103:1440125 [0] NCCL INFO comm 0x91c6060 rank 62 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub096:1440104:1440126 [0] NCCL INFO comm 0x9f265ce0 rank 63 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub096:1440102:1440128 [0] NCCL INFO comm 0x50d96930 rank 61 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub032:3246893:3246982 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub032:3246893:3246893 [1] NCCL INFO comm 0x9a6ad00 rank 29 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 23] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 21] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 38] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 22] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub060:1938143:1938169 [0] NCCL INFO comm 0x50561020 rank 36 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 20] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub096:1440101:1440127 [0] NCCL INFO comm 0x50b020d0 rank 60 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 39] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 37] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 35] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 33] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+[W ProcessGroupNCCL.cpp:948] [Rank 32] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 62] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 11] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub059:1894384:1894465 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub059:1894386:1894467 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+[W ProcessGroupNCCL.cpp:948] [Rank 9] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 8] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub059:1894386:1894405 [0] NCCL INFO comm 0x9cf1390 rank 35 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+[W ProcessGroupNCCL.cpp:948] [Rank 10] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 63] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 34] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub059:1894385:1894466 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: [Rank 29] Caught collective operation timeout: WorkNCCL(SeqNum=2076236, OpType=ALLREDUCE, TensorShape=[], Timeout(ms)=1800000) ran for 1800044 milliseconds before timing out.
+gpub059:1894385:1894403 [0] NCCL INFO comm 0x50af3510 rank 34 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub015:828881:828961 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub015:828878:828959 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub015:828879:828960 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub015:828880:828958 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+[W ProcessGroupNCCL.cpp:948] [Rank 4] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 7] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 5] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub015:828879:828899 [0] NCCL INFO comm 0x8ad4b90 rank 9 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub059:1894384:1894406 [0] NCCL INFO comm 0xb7b49460 rank 33 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 61] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 6] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub015:828880:828901 [0] NCCL INFO comm 0x9e67ed0 rank 10 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub015:828881:828898 [0] NCCL INFO comm 0xb64dad10 rank 11 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub002:1756560:1756644 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub002:1756561:1756645 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub002:1756562:1756643 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub015:828878:828900 [0] NCCL INFO comm 0x8fc63100 rank 8 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+gpub002:1756560:1756584 [0] NCCL INFO comm 0x17829840 rank 5 nranks 64 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub002:1756561:1756582 [0] NCCL INFO comm 0x51ad54d0 rank 6 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+gpub002:1756562:1756583 [0] NCCL INFO comm 0x9ca8ab90 rank 7 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub002:1756559:1756646 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub002:1756559:1756581 [0] NCCL INFO comm 0x51930090 rank 4 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 36] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 60] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 18. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub059:1894383:1894404 [0] NCCL INFO comm 0x510467d0 rank 32 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 35] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+[W ProcessGroupNCCL.cpp:948] [Rank 30] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 9] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 31] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+[W ProcessGroupNCCL.cpp:948] [Rank 28] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub032:3246895:3246981 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub032:3246895:3246917 [0] NCCL INFO comm 0x1b5e5670 rank 31 nranks 64 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub032:3246894:3246916 [0] NCCL INFO comm 0x9ddee7e0 rank 30 nranks 64 cudaDev 2 busId 85000 - Abort COMPLETE
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 5] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 10] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 7] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-2:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 33] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 6] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 8] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 11] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 4] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator was aborted on rank 34.  Original reason for failure was: [Rank 34] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 32] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+gpub032:3246892:3246918 [0] NCCL INFO comm 0x4ff3dba0 rank 28 nranks 64 cudaDev 0 busId 7000 - Abort COMPLETE
+Process SpawnProcess-4:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 31] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-3:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 567, in train_one_epoch
+    retval = model(**batch)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1034, in forward
+    self._sync_buffers()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1621, in _sync_buffers
+    self._sync_module_buffers(authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1625, in _sync_module_buffers
+    self._default_broadcast_coalesced(authoritative_rank=authoritative_rank)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1646, in _default_broadcast_coalesced
+    self._distributed_broadcast_coalesced(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1562, in _distributed_broadcast_coalesced
+    dist._broadcast_coalesced(
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 30] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Process SpawnProcess-1:
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
+    self.run()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/multiprocessing/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1359, in main_worker
+    cls.trainer.run(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 290, in run
+    all_steps_are_invalid = cls.train_one_epoch(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/train/trainer.py", line 516, in train_one_epoch
+    torch.distributed.all_reduce(iterator_stop, ReduceOp.SUM)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 1541, in all_reduce
+    work.wait()
+RuntimeError: NCCL communicator encountered error set by ProcessGroupNCCL: [Rank 28] Found key in store: NCCLABORTEDCOMM:20c6b5ac1c1765000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, from rank: 29. This means that rank has aborted its NCCL communicators previously and is not in a healthy state.. Aborting appropriate communicators
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+srun: error: gpub067: task 11: Exited with exit code 1
+srun: error: gpub022: task 4: Exited with exit code 1
+srun: error: gpub031: task 6: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+srun: error: gpub096: task 15: Exited with exit code 1
+srun: error: gpub032: task 7: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+srun: error: gpub066: task 10: Exited with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+    main()
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    main()
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    S2TTask.main(cmd=cmd)
+    S2TTask.main(cmd=cmd)
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with exit code 1
+    raise ProcessExitedException(
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+    raise ProcessExitedException(
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 2 terminated with exit code 1
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    return _run_code(code, main_globals, None,
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+Traceback (most recent call last):
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 196, in _run_module_as_main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    return _run_code(code, main_globals, None,
+    raise ProcessExitedException(
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/runpy.py", line 86, in _run_code
+torch.multiprocessing.spawn.ProcessExitedException: process 1 terminated with exit code 1
+    exec(code, run_globals)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 23, in <module>
+    main()
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py", line 19, in main
+    S2TTask.main(cmd=cmd)
+  File "/scratch/bbjs/peng6/espnet-whisper-public/espnet2/tasks/abs_task.py", line 1104, in main
+    while not ProcessContext(processes, error_queues).join():
+  File "/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/lib/python3.10/site-packages/torch/multiprocessing/spawn.py", line 149, in join
+    raise ProcessExitedException(
+torch.multiprocessing.spawn.ProcessExitedException: process 3 terminated with exit code 1
+srun: error: gpub060: task 9: Exited with exit code 1
+srun: error: gpub076: task 12: Exited with exit code 1
+srun: error: gpub079: task 14: Exited with exit code 1
+srun: error: gpub077: task 13: Exited with exit code 1
+srun: error: gpub059: task 8: Exited with exit code 1
+srun: error: gpub030: task 5: Exited with exit code 1
+srun: error: gpub002: task 1: Exited with exit code 1
+srun: error: gpub016: task 3: Exited with exit code 1
+srun: error: gpub015: task 2: Exited with exit code 1
+# Accounting: begin_time=1688441050
+# Accounting: end_time=1688483245
+# Accounting: time=42195 threads=1
+# Finished at Tue Jul 4 10:07:25 CDT 2023 with status 1
diff --git a/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
new file mode 100644
index 0000000000000000000000000000000000000000..d2fb98012b3fefc71cd385147b7a24d9099386bb
--- /dev/null
+++ b/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
@@ -0,0 +1,1294 @@
+# Running on gpub074.delta.ncsa.illinois.edu
+# Started at Sun Jul 16 00:42:43 CDT 2023
+# SLURMD_NODENAME=gpub074
+# SLURM_CLUSTER_NAME=delta
+# SLURM_CONF=/var/spool/slurmd/conf-cache/slurm.conf
+# SLURM_CPUS_ON_NODE=64
+# SLURM_CPUS_PER_TASK=64
+# SLURM_EXPORT_ENV=PATH
+# SLURM_GET_USER_ENV=1
+# SLURM_GPUS_ON_NODE=4
+# SLURM_GTIDS=0
+# SLURM_JOBID=2179250
+# SLURM_JOB_ACCOUNT=bbjs-delta-gpu
+# SLURM_JOB_CPUS_PER_NODE=64
+# SLURM_JOB_GID=202
+# SLURM_JOB_GPUS=0,1,2,3
+# SLURM_JOB_ID=2179250
+# SLURM_JOB_NAME=exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/train.log
+# SLURM_JOB_NODELIST=gpub074
+# SLURM_JOB_NUM_NODES=1
+# SLURM_JOB_PARTITION=gpuA40x4
+# SLURM_JOB_QOS=bbjs-delta-gpu
+# SLURM_JOB_UID=68077
+# SLURM_JOB_USER=peng6
+# SLURM_LOCALID=0
+# SLURM_MEM_PER_NODE=240000
+# SLURM_NNODES=1
+# SLURM_NODEID=0
+# SLURM_NODELIST=gpub074
+# SLURM_NODE_ALIASES='(null)'
+# SLURM_OPEN_MODE=a
+# SLURM_PRIO_PROCESS=0
+# SLURM_PROCID=0
+# SLURM_SUBMIT_DIR=/scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v3/s2t1
+# SLURM_SUBMIT_HOST=dt-login02.delta.internal.ncsa.edu
+# SLURM_TASKS_PER_NODE=1
+# SLURM_TASK_PID=4188774
+# SLURM_TOPOLOGY_ADDR=ss00.ss12.gpub074
+# SLURM_TOPOLOGY_ADDR_PATTERN=switch.switch.node
+# SLURM_WORKING_CLUSTER=delta:dt-sched:6817:9728:109
+# python3 -m espnet2.bin.s2t_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed True 
+/scratch/bbjs/peng6/espnet-whisper-public/tools/miniconda/envs/espnet/bin/python3 /scratch/bbjs/peng6/espnet-whisper-public/espnet2/bin/s2t_train.py --use_preprocessor true --bpemodel data/token_list/bpe_unigram50000/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram50000/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,kaldi_ark --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/speech_shape --resume true --fold_length 80000 --output_dir exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000 --config conf/train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/wav.scp,speech,kaldi_ark --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/speech_shape --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.prev,text_prev,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_prev_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text.ctc,text_ctc,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_ctc_shape.bpe --fold_length 150 --train_data_path_and_name_and_type exp/s2t_stats_raw_bpe50000/splits12/text,text,text --train_shape_file exp/s2t_stats_raw_bpe50000/splits12/text_shape.bpe --multiple_iterator true --valid_data_path_and_name_and_type dump/raw/dev/text.prev,text_prev,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_prev_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text.ctc,text_ctc,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_ctc_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/s2t_stats_raw_bpe50000/valid/text_shape.bpe --ngpu 4 --multiprocessing_distributed True
+[gpub074:0/4] 2023-07-16 00:44:44,966 (distributed_c10d:319) INFO: Added key: store_based_barrier_key:1 to store for rank: 0
+[gpub074:0/4] 2023-07-16 00:44:44,967 (distributed_c10d:353) INFO: Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 4 nodes.
+[gpub074:0/4] 2023-07-16 00:44:45,026 (s2t:483) INFO: Vocabulary size: 50002
+[gpub074:0/4] 2023-07-16 00:44:56,171 (abs_task:1201) INFO: pytorch.version=1.13.1, cuda.available=True, cudnn.version=8500, cudnn.benchmark=False, cudnn.deterministic=True
+[gpub074:0/4] 2023-07-16 00:44:56,225 (abs_task:1202) INFO: Model structure:
+ESPnetS2TModel(
+  (frontend): DefaultFrontend(
+    (stft): Stft(n_fft=512, win_length=400, hop_length=160, center=True, normalized=False, onesided=True)
+    (frontend): Frontend()
+    (logmel): LogMel(sr=16000, n_fft=512, n_mels=80, fmin=0, fmax=8000.0, htk=False)
+  )
+  (specaug): SpecAug(
+    (freq_mask): MaskAlongAxis(mask_width_range=[0, 27], num_mask=2, axis=freq)
+    (time_mask): MaskAlongAxisVariableMaxWidth(mask_width_ratio_range=[0.0, 0.05], num_mask=10, axis=time)
+  )
+  (normalize): GlobalMVN(stats_file=exp/s2t_stats_raw_bpe50000/train/feats_stats.npz, norm_means=True, norm_vars=True)
+  (encoder): TransformerEncoder(
+    (embed): Conv2dSubsampling(
+      (conv): Sequential(
+        (0): Conv2d(1, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (1): ReLU()
+        (2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(2, 2))
+        (3): ReLU()
+      )
+      (out): Sequential(
+        (0): Linear(in_features=19456, out_features=1024, bias=True)
+        (1): PositionalEncoding(
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+      )
+    )
+    (encoders): MultiSequential(
+      (0): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): EncoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+  )
+  (decoder): TransformerDecoder(
+    (embed): Sequential(
+      (0): Embedding(50002, 1024)
+      (1): PositionalEncoding(
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+    (after_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+    (output_layer): Linear(in_features=1024, out_features=50002, bias=True)
+    (decoders): MultiSequential(
+      (0): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (1): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (2): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (3): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (4): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (5): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (6): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (7): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (8): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (9): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (10): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (11): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (12): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (13): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (14): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (15): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (16): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (17): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (18): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (19): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (20): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (21): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (22): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (23): DecoderLayer(
+        (self_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (src_attn): MultiHeadedAttention(
+          (linear_q): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_k): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_v): Linear(in_features=1024, out_features=1024, bias=True)
+          (linear_out): Linear(in_features=1024, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+        )
+        (feed_forward): PositionwiseFeedForward(
+          (w_1): Linear(in_features=1024, out_features=4096, bias=True)
+          (w_2): Linear(in_features=4096, out_features=1024, bias=True)
+          (dropout): Dropout(p=0.1, inplace=False)
+          (activation): ReLU()
+        )
+        (norm1): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm2): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (norm3): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+    )
+  )
+  (criterion_att): LabelSmoothingLoss(
+    (criterion): KLDivLoss()
+  )
+  (ctc): CTC(
+    (ctc_lo): Linear(in_features=1024, out_features=50002, bias=True)
+    (ctc_loss): CTCLoss()
+  )
+)
+
+Model summary:
+    Class Name: ESPnetS2TModel
+    Total Number of model parameters: 888.51 M
+    Number of trainable parameters: 888.51 M (100.0%)
+    Size: 3.55 GB
+    Type: torch.float32
+[gpub074:0/4] 2023-07-16 00:44:56,225 (abs_task:1205) INFO: Optimizer:
+AdamW (
+Parameter Group 0
+    amsgrad: False
+    betas: [0.9, 0.98]
+    capturable: False
+    eps: 1e-06
+    foreach: None
+    initial_lr: 0.00025
+    lr: 2.5e-08
+    maximize: False
+    weight_decay: 0.0
+)
+[gpub074:0/4] 2023-07-16 00:44:56,225 (abs_task:1206) INFO: Scheduler: WarmupLR(warmup_steps=10000)
+[gpub074:0/4] 2023-07-16 00:44:56,240 (abs_task:1215) INFO: Saving the configuration in exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/config.yaml
+[gpub074:0/4] 2023-07-16 00:44:56,958 (abs_task:1272) INFO: Loading pretrained params from /scratch/bbjs/peng6/espnet-whisper-public/egs2/mixed_v2/s2t1/exp/s2t_train_s2t_transformer_conv2d_size1024_e18_d18_lr5e-4_warmup20k_raw_bpe50000/valid.acc.ave.pth
+[gpub074:0/4] 2023-07-16 00:45:04,695 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub074:0/4] 2023-07-16 00:45:04,919 (abs_task:1570) INFO: [valid] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5d36853df0>)
+[gpub074:0/4] 2023-07-16 00:45:04,919 (abs_task:1571) INFO: [valid] Batch sampler: UnsortedBatchSampler(N-batch=1012, batch_size=128, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub074:0/4] 2023-07-16 00:45:04,927 (abs_task:1572) INFO: [valid] mini-batch sizes summary: N-batch=1012, mean=128.1, min=128, max=129
+[gpub074:0/4] 2023-07-16 00:45:05,429 (s2t:454) INFO: Optional Data Names: ('text_prev', 'text_ctc', 'text_spk2', 'text_spk3', 'text_spk4')
+[gpub074:0/4] 2023-07-16 00:45:05,815 (abs_task:1570) INFO: [plot_att] dataset:
+ESPnetDataset(
+  speech: {"path": "dump/raw/dev/wav.scp", "type": "kaldi_ark"}
+  text_prev: {"path": "dump/raw/dev/text.prev", "type": "text"}
+  text_ctc: {"path": "dump/raw/dev/text.ctc", "type": "text"}
+  text: {"path": "dump/raw/dev/text", "type": "text"}
+  preprocess: <espnet2.train.preprocessor.S2TPreprocessor object at 0x7f5d36853a00>)
+[gpub074:0/4] 2023-07-16 00:45:05,815 (abs_task:1571) INFO: [plot_att] Batch sampler: UnsortedBatchSampler(N-batch=129591, batch_size=1, key_file=exp/s2t_stats_raw_bpe50000/valid/speech_shape, 
+[gpub074:0/4] 2023-07-16 00:45:05,815 (abs_task:1572) INFO: [plot_att] mini-batch sizes summary: N-batch=3, mean=1.0, min=1, max=1
+[gpub074:0/4] 2023-07-16 00:45:33,488 (trainer:159) INFO: The training was resumed using exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/checkpoint.pth
+[gpub074:0/4] 2023-07-16 00:45:33,492 (trainer:218) WARNING: The training has already reached at max_epoch: 56
+gpub074:4188818:4188818 [0] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4188818:4188818 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4188818:4188818 [0] NCCL INFO cudaDriverVersion 12010
+NCCL version 2.14.3+cuda11.7
+gpub074:4188819:4188819 [1] NCCL INFO cudaDriverVersion 12010
+gpub074:4188819:4188819 [1] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4188819:4188819 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4188819:4188945 [1] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4188819:4188945 [1] NCCL INFO Using network IB
+gpub074:4188819:4188945 [1] NCCL INFO Setting affinity for GPU 1 to ffff,00000000
+gpub074:4188819:4188945 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0
+gpub074:4188819:4188945 [1] NCCL INFO Channel 00/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Channel 01/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Channel 02/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Channel 03/0 : 1[46000] -> 2[85000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Connected all rings
+gpub074:4188819:4188945 [1] NCCL INFO Channel 00/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Channel 01/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Channel 02/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Channel 03/0 : 1[46000] -> 0[7000] via P2P/IPC
+gpub074:4188819:4188945 [1] NCCL INFO Connected all trees
+gpub074:4188819:4188945 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512
+gpub074:4188819:4188945 [1] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer
+gpub074:4188819:4188945 [1] NCCL INFO comm 0xa71a630 rank 1 nranks 4 cudaDev 1 busId 46000 - Init COMPLETE
+gpub074:4188821:4188821 [3] NCCL INFO cudaDriverVersion 12010
+gpub074:4188821:4188821 [3] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4188821:4188821 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4188821:4188946 [3] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4188821:4188946 [3] NCCL INFO Using network IB
+gpub074:4188821:4188946 [3] NCCL INFO Setting affinity for GPU 3 to ffff
+gpub074:4188821:4188946 [3] NCCL INFO Trees [0] -1/-1/-1->3->2 [1] -1/-1/-1->3->2 [2] -1/-1/-1->3->2 [3] -1/-1/-1->3->2
+gpub074:4188821:4188946 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 0[7000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 0[7000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Channel 02/0 : 3[c7000] -> 0[7000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Channel 03/0 : 3[c7000] -> 0[7000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Connected all rings
+gpub074:4188821:4188946 [3] NCCL INFO Channel 00/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Channel 01/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Channel 02/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Channel 03/0 : 3[c7000] -> 2[85000] via P2P/IPC
+gpub074:4188821:4188946 [3] NCCL INFO Connected all trees
+gpub074:4188821:4188946 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512
+gpub074:4188821:4188946 [3] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer
+gpub074:4188821:4188946 [3] NCCL INFO comm 0xa2e9cb0 rank 3 nranks 4 cudaDev 3 busId c7000 - Init COMPLETE
+gpub074:4188820:4188820 [2] NCCL INFO cudaDriverVersion 12010
+gpub074:4188820:4188820 [2] NCCL INFO Bootstrap : Using eth1:172.28.23.174<0>
+gpub074:4188820:4188820 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
+gpub074:4188820:4188947 [2] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4188820:4188947 [2] NCCL INFO Using network IB
+gpub074:4188820:4188947 [2] NCCL INFO Setting affinity for GPU 2 to ffff0000
+gpub074:4188820:4188947 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1
+gpub074:4188820:4188947 [2] NCCL INFO Channel 00/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Channel 01/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Channel 02/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Channel 03/0 : 2[85000] -> 3[c7000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Connected all rings
+gpub074:4188820:4188947 [2] NCCL INFO Channel 00/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Channel 01/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Channel 02/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Channel 03/0 : 2[85000] -> 1[46000] via P2P/IPC
+gpub074:4188820:4188947 [2] NCCL INFO Connected all trees
+gpub074:4188820:4188947 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512
+gpub074:4188820:4188947 [2] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer
+gpub074:4188820:4188947 [2] NCCL INFO comm 0x4f8c6c10 rank 2 nranks 4 cudaDev 2 busId 85000 - Init COMPLETE
+gpub074:4188819:4188953 [1] NCCL INFO [Service thread] Connection closed by localRank 1
+gpub074:4188819:4188819 [1] NCCL INFO comm 0xa71a630 rank 1 nranks 4 cudaDev 1 busId 46000 - Abort COMPLETE
+gpub074:4188821:4188955 [3] NCCL INFO [Service thread] Connection closed by localRank 3
+gpub074:4188821:4188821 [3] NCCL INFO comm 0xa2e9cb0 rank 3 nranks 4 cudaDev 3 busId c7000 - Abort COMPLETE
+gpub074:4188820:4188952 [2] NCCL INFO [Service thread] Connection closed by localRank 2
+gpub074:4188820:4188820 [2] NCCL INFO comm 0x4f8c6c10 rank 2 nranks 4 cudaDev 2 busId 85000 - Abort COMPLETE
+[gpub074:0/4] 2023-07-16 00:45:37,470 (trainer:458) INFO: The training was finished at 55 epochs 
+[gpub074:0/4] 2023-07-16 00:45:37,508 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.acc": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.pth
+[gpub074:0/4] 2023-07-16 00:46:24,407 (average_nbest_models:69) INFO: Averaging 5best models: criterion="valid.total_count": exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.total_count.ave_5best.pth
+gpub074:4188818:4188944 [0] NCCL INFO NET/IB : Using [0]mlx5_0:1/RoCE [RO]; OOB eth1:172.28.23.174<0>
+gpub074:4188818:4188944 [0] NCCL INFO Using network IB
+gpub074:4188818:4188944 [0] NCCL INFO Setting affinity for GPU 0 to ffff0000,00000000
+gpub074:4188818:4188944 [0] NCCL INFO Channel 00/04 :    0   1   2   3
+gpub074:4188818:4188944 [0] NCCL INFO Channel 01/04 :    0   1   2   3
+gpub074:4188818:4188944 [0] NCCL INFO Channel 02/04 :    0   1   2   3
+gpub074:4188818:4188944 [0] NCCL INFO Channel 03/04 :    0   1   2   3
+gpub074:4188818:4188944 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1
+gpub074:4188818:4188944 [0] NCCL INFO Channel 00/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub074:4188818:4188944 [0] NCCL INFO Channel 01/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub074:4188818:4188944 [0] NCCL INFO Channel 02/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub074:4188818:4188944 [0] NCCL INFO Channel 03/0 : 0[7000] -> 1[46000] via P2P/IPC
+gpub074:4188818:4188944 [0] NCCL INFO Connected all rings
+gpub074:4188818:4188944 [0] NCCL INFO Connected all trees
+gpub074:4188818:4188944 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 512 | 512
+gpub074:4188818:4188944 [0] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer
+gpub074:4188818:4188944 [0] NCCL INFO comm 0x4f7f3ad0 rank 0 nranks 4 cudaDev 0 busId 7000 - Init COMPLETE
+gpub074:4188818:4188954 [0] NCCL INFO [Service thread] Connection closed by localRank 0
+gpub074:4188818:4188818 [0] NCCL INFO comm 0x4f7f3ad0 rank 0 nranks 4 cudaDev 0 busId 7000 - Abort COMPLETE
+# Accounting: begin_time=1689486163
+# Accounting: end_time=1689486432
+# Accounting: time=269 threads=1
+# Finished at Sun Jul 16 00:47:12 CDT 2023 with status 0